blob: 93fdf3458eb03014f1d168fea0b87b73226587c3 [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj03f8d3f2012-08-05 15:46:46 +000011 Copyright (C) 2000-2012 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000033#include "pub_tool_poolalloc.h" // For mc_include.h
njn1d0825f2006-03-27 11:37:07 +000034#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000035#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000036#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000037#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000038#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000039#include "pub_tool_xarray.h"
40#include "pub_tool_mallocfree.h"
41#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000042
sewardj7cf4e6b2008-05-01 20:24:26 +000043#include "mc_include.h"
44
45
sewardj7ee7d852011-06-16 11:37:21 +000046/* FIXMEs JRS 2011-June-16.
47
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
51*/
52
sewardj992dff92005-10-07 11:08:55 +000053/* This file implements the Memcheck instrumentation, and in
54 particular contains the core of its undefined value detection
55 machinery. For a comprehensive background of the terminology,
56 algorithms and rationale used herein, read:
57
58 Using Valgrind to detect undefined value errors with
59 bit-precision
60
61 Julian Seward and Nicholas Nethercote
62
63 2005 USENIX Annual Technical Conference (General Track),
64 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000065
66 ----
67
68 Here is as good a place as any to record exactly when V bits are and
69 should be checked, why, and what function is responsible.
70
71
72 Memcheck complains when an undefined value is used:
73
74 1. In the condition of a conditional branch. Because it could cause
75 incorrect control flow, and thus cause incorrect externally-visible
76 behaviour. [mc_translate.c:complainIfUndefined]
77
78 2. As an argument to a system call, or as the value that specifies
79 the system call number. Because it could cause an incorrect
80 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
81
82 3. As the address in a load or store. Because it could cause an
83 incorrect value to be used later, which could cause externally-visible
84 behaviour (eg. via incorrect control flow or an incorrect system call
85 argument) [complainIfUndefined]
86
87 4. As the target address of a branch. Because it could cause incorrect
88 control flow. [complainIfUndefined]
89
90 5. As an argument to setenv, unsetenv, or putenv. Because it could put
91 an incorrect value into the external environment.
92 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
93
94 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
95 [complainIfUndefined]
96
97 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
98 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
99 requested it. [in memcheck.h]
100
101
102 Memcheck also complains, but should not, when an undefined value is used:
103
104 8. As the shift value in certain SIMD shift operations (but not in the
105 standard integer shift operations). This inconsistency is due to
106 historical reasons.) [complainIfUndefined]
107
108
109 Memcheck does not complain, but should, when an undefined value is used:
110
111 9. As an input to a client request. Because the client request may
112 affect the visible behaviour -- see bug #144362 for an example
113 involving the malloc replacements in vg_replace_malloc.c and
114 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
115 isn't identified. That bug report also has some info on how to solve
116 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
117
118
119 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000120*/
121
sewardjb9e6d242013-05-11 13:42:08 +0000122/* Generation of addr-definedness, addr-validity and
123 guard-definedness checks pertaining to loads and stores (Iex_Load,
124 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
125 loads/stores) was re-checked 11 May 2013. */
126
sewardj95448072004-11-22 20:19:51 +0000127/*------------------------------------------------------------*/
128/*--- Forward decls ---*/
129/*------------------------------------------------------------*/
130
131struct _MCEnv;
132
sewardj7cf4e6b2008-05-01 20:24:26 +0000133static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000134static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000135static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000136
sewardjb5b87402011-03-07 16:05:35 +0000137static IRExpr *i128_const_zero(void);
sewardj95448072004-11-22 20:19:51 +0000138
139/*------------------------------------------------------------*/
140/*--- Memcheck running state, and tmp management. ---*/
141/*------------------------------------------------------------*/
142
sewardj1c0ce7a2009-07-01 08:10:49 +0000143/* Carries info about a particular tmp. The tmp's number is not
144 recorded, as this is implied by (equal to) its index in the tmpMap
145 in MCEnv. The tmp's type is also not recorded, as this is present
146 in MCEnv.sb->tyenv.
147
148 When .kind is Orig, .shadowV and .shadowB may give the identities
149 of the temps currently holding the associated definedness (shadowV)
150 and origin (shadowB) values, or these may be IRTemp_INVALID if code
151 to compute such values has not yet been emitted.
152
153 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
154 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
155 illogical for a shadow tmp itself to be shadowed.
156*/
157typedef
158 enum { Orig=1, VSh=2, BSh=3 }
159 TempKind;
160
161typedef
162 struct {
163 TempKind kind;
164 IRTemp shadowV;
165 IRTemp shadowB;
166 }
167 TempMapEnt;
168
169
sewardj95448072004-11-22 20:19:51 +0000170/* Carries around state during memcheck instrumentation. */
171typedef
172 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000173 /* MODIFIED: the superblock being constructed. IRStmts are
174 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000175 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000176 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000177
sewardj1c0ce7a2009-07-01 08:10:49 +0000178 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
179 current kind and possibly shadow temps for each temp in the
180 IRSB being constructed. Note that it does not contain the
181 type of each tmp. If you want to know the type, look at the
182 relevant entry in sb->tyenv. It follows that at all times
183 during the instrumentation process, the valid indices for
184 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
185 total number of Orig, V- and B- temps allocated so far.
186
187 The reason for this strange split (types in one place, all
188 other info in another) is that we need the types to be
189 attached to sb so as to make it possible to do
190 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
191 instrumentation process. */
192 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000193
sewardjd5204dc2004-12-31 01:16:11 +0000194 /* MODIFIED: indicates whether "bogus" literals have so far been
195 found. Starts off False, and may change to True. */
sewardj54eac252012-03-27 10:19:39 +0000196 Bool bogusLiterals;
197
198 /* READONLY: indicates whether we should use expensive
199 interpretations of integer adds, since unfortunately LLVM
200 uses them to do ORs in some circumstances. Defaulted to True
201 on MacOS and False everywhere else. */
202 Bool useLLVMworkarounds;
sewardjd5204dc2004-12-31 01:16:11 +0000203
sewardj95448072004-11-22 20:19:51 +0000204 /* READONLY: the guest layout. This indicates which parts of
205 the guest state should be regarded as 'always defined'. */
206 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000207
sewardj95448072004-11-22 20:19:51 +0000208 /* READONLY: the host word type. Needed for constructing
209 arguments of type 'HWord' to be passed to helper functions.
210 Ity_I32 or Ity_I64 only. */
211 IRType hWordTy;
212 }
213 MCEnv;
214
215/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
216 demand), as they are encountered. This is for two reasons.
217
218 (1) (less important reason): Many original tmps are unused due to
219 initial IR optimisation, and we do not want to spaces in tables
220 tracking them.
221
222 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
223 table indexed [0 .. n_types-1], which gives the current shadow for
224 each original tmp, or INVALID_IRTEMP if none is so far assigned.
225 It is necessary to support making multiple assignments to a shadow
226 -- specifically, after testing a shadow for definedness, it needs
227 to be made defined. But IR's SSA property disallows this.
228
229 (2) (more important reason): Therefore, when a shadow needs to get
230 a new value, a new temporary is created, the value is assigned to
231 that, and the tmpMap is updated to reflect the new binding.
232
233 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000234 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000235 there's a read-before-write error in the original tmps. The IR
236 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000237*/
sewardj95448072004-11-22 20:19:51 +0000238
sewardj1c0ce7a2009-07-01 08:10:49 +0000239/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
240 both the table in mce->sb and to our auxiliary mapping. Note that
241 newTemp may cause mce->tmpMap to resize, hence previous results
242 from VG_(indexXA)(mce->tmpMap) are invalidated. */
243static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
244{
245 Word newIx;
246 TempMapEnt ent;
247 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
248 ent.kind = kind;
249 ent.shadowV = IRTemp_INVALID;
250 ent.shadowB = IRTemp_INVALID;
251 newIx = VG_(addToXA)( mce->tmpMap, &ent );
252 tl_assert(newIx == (Word)tmp);
253 return tmp;
254}
255
256
sewardj95448072004-11-22 20:19:51 +0000257/* Find the tmp currently shadowing the given original tmp. If none
258 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000259static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000260{
sewardj1c0ce7a2009-07-01 08:10:49 +0000261 TempMapEnt* ent;
262 /* VG_(indexXA) range-checks 'orig', hence no need to check
263 here. */
264 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
265 tl_assert(ent->kind == Orig);
266 if (ent->shadowV == IRTemp_INVALID) {
267 IRTemp tmpV
268 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
269 /* newTemp may cause mce->tmpMap to resize, hence previous results
270 from VG_(indexXA) are invalid. */
271 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
272 tl_assert(ent->kind == Orig);
273 tl_assert(ent->shadowV == IRTemp_INVALID);
274 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000275 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000276 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000277}
278
sewardj95448072004-11-22 20:19:51 +0000279/* Allocate a new shadow for the given original tmp. This means any
280 previous shadow is abandoned. This is needed because it is
281 necessary to give a new value to a shadow once it has been tested
282 for undefinedness, but unfortunately IR's SSA property disallows
283 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000284 and use that instead.
285
286 This is the same as findShadowTmpV, except we don't bother to see
287 if a shadow temp already existed -- we simply allocate a new one
288 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000289static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000290{
sewardj1c0ce7a2009-07-01 08:10:49 +0000291 TempMapEnt* ent;
292 /* VG_(indexXA) range-checks 'orig', hence no need to check
293 here. */
294 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
295 tl_assert(ent->kind == Orig);
296 if (1) {
297 IRTemp tmpV
298 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
299 /* newTemp may cause mce->tmpMap to resize, hence previous results
300 from VG_(indexXA) are invalid. */
301 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
302 tl_assert(ent->kind == Orig);
303 ent->shadowV = tmpV;
304 }
sewardj95448072004-11-22 20:19:51 +0000305}
306
307
308/*------------------------------------------------------------*/
309/*--- IRAtoms -- a subset of IRExprs ---*/
310/*------------------------------------------------------------*/
311
312/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000313 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000314 input, most of this code deals in atoms. Usefully, a value atom
315 always has a V-value which is also an atom: constants are shadowed
316 by constants, and temps are shadowed by the corresponding shadow
317 temporary. */
318
319typedef IRExpr IRAtom;
320
321/* (used for sanity checks only): is this an atom which looks
322 like it's from original code? */
323static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
324{
325 if (a1->tag == Iex_Const)
326 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000327 if (a1->tag == Iex_RdTmp) {
328 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
329 return ent->kind == Orig;
330 }
sewardj95448072004-11-22 20:19:51 +0000331 return False;
332}
333
334/* (used for sanity checks only): is this an atom which looks
335 like it's from shadow code? */
336static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
337{
338 if (a1->tag == Iex_Const)
339 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000340 if (a1->tag == Iex_RdTmp) {
341 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
342 return ent->kind == VSh || ent->kind == BSh;
343 }
sewardj95448072004-11-22 20:19:51 +0000344 return False;
345}
346
347/* (used for sanity checks only): check that both args are atoms and
348 are identically-kinded. */
349static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
350{
sewardj0b9d74a2006-12-24 02:24:11 +0000351 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000352 return True;
sewardjbef552a2005-08-30 12:54:36 +0000353 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000354 return True;
355 return False;
356}
357
358
359/*------------------------------------------------------------*/
360/*--- Type management ---*/
361/*------------------------------------------------------------*/
362
363/* Shadow state is always accessed using integer types. This returns
364 an integer type with the same size (as per sizeofIRType) as the
365 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj45fa9f42012-05-21 10:18:10 +0000366 I64, I128, V128, V256. */
sewardj95448072004-11-22 20:19:51 +0000367
sewardj7cf4e6b2008-05-01 20:24:26 +0000368static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000369{
370 switch (ty) {
371 case Ity_I1:
372 case Ity_I8:
373 case Ity_I16:
374 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000375 case Ity_I64:
376 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000377 case Ity_F32: return Ity_I32;
sewardjb0ccb4d2012-04-02 10:22:05 +0000378 case Ity_D32: return Ity_I32;
sewardj3245c912004-12-10 14:58:26 +0000379 case Ity_F64: return Ity_I64;
sewardjb0ccb4d2012-04-02 10:22:05 +0000380 case Ity_D64: return Ity_I64;
sewardjb5b87402011-03-07 16:05:35 +0000381 case Ity_F128: return Ity_I128;
sewardjb0ccb4d2012-04-02 10:22:05 +0000382 case Ity_D128: return Ity_I128;
sewardj3245c912004-12-10 14:58:26 +0000383 case Ity_V128: return Ity_V128;
sewardj45fa9f42012-05-21 10:18:10 +0000384 case Ity_V256: return Ity_V256;
sewardj95448072004-11-22 20:19:51 +0000385 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000386 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000387 }
388}
389
390/* Produce a 'defined' value of the given shadow type. Should only be
391 supplied shadow types (Bit/I8/I16/I32/UI64). */
392static IRExpr* definedOfType ( IRType ty ) {
393 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000394 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
395 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
396 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
397 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
398 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
sewardjb5b87402011-03-07 16:05:35 +0000399 case Ity_I128: return i128_const_zero();
sewardj170ee212004-12-10 18:57:51 +0000400 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardjf1962d32006-10-19 13:22:16 +0000401 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000402 }
403}
404
405
sewardj95448072004-11-22 20:19:51 +0000406/*------------------------------------------------------------*/
407/*--- Constructing IR fragments ---*/
408/*------------------------------------------------------------*/
409
sewardj95448072004-11-22 20:19:51 +0000410/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000411static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
412 if (mce->trace) {
413 VG_(printf)(" %c: ", cat);
414 ppIRStmt(st);
415 VG_(printf)("\n");
416 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000417 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000418}
419
420/* assign value to tmp */
421static inline
422void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000423 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000424}
sewardj95448072004-11-22 20:19:51 +0000425
426/* build various kinds of expressions */
sewardj57f92b02010-08-22 11:54:14 +0000427#define triop(_op, _arg1, _arg2, _arg3) \
428 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
sewardj95448072004-11-22 20:19:51 +0000429#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
430#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
sewardjcc961652013-01-26 11:49:15 +0000431#define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
sewardj95448072004-11-22 20:19:51 +0000432#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
433#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
434#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
435#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000436#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000437#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000438
sewardj7cf4e6b2008-05-01 20:24:26 +0000439/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000440 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000441 an atom.
442
443 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000444 needs to be. But passing it in is redundant, since we can deduce
445 the type merely by inspecting 'e'. So at least use that fact to
446 assert that the two types agree. */
447static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
448{
449 TempKind k;
450 IRTemp t;
451 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardjb0ccb4d2012-04-02 10:22:05 +0000452
sewardj7cf4e6b2008-05-01 20:24:26 +0000453 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000454 switch (cat) {
455 case 'V': k = VSh; break;
456 case 'B': k = BSh; break;
457 case 'C': k = Orig; break;
458 /* happens when we are making up new "orig"
459 expressions, for IRCAS handling */
460 default: tl_assert(0);
461 }
462 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000463 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000464 return mkexpr(t);
465}
466
467
468/*------------------------------------------------------------*/
sewardjb5b87402011-03-07 16:05:35 +0000469/*--- Helper functions for 128-bit ops ---*/
470/*------------------------------------------------------------*/
sewardj45fa9f42012-05-21 10:18:10 +0000471
sewardjb5b87402011-03-07 16:05:35 +0000472static IRExpr *i128_const_zero(void)
473{
sewardj45fa9f42012-05-21 10:18:10 +0000474 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
475 return binop(Iop_64HLto128, z64, z64);
sewardjb5b87402011-03-07 16:05:35 +0000476}
477
sewardj45fa9f42012-05-21 10:18:10 +0000478/* There are no I128-bit loads and/or stores [as generated by any
479 current front ends]. So we do not need to worry about that in
480 expr2vbits_Load */
481
sewardjb5b87402011-03-07 16:05:35 +0000482
483/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +0000484/*--- Constructing definedness primitive ops ---*/
485/*------------------------------------------------------------*/
486
487/* --------- Defined-if-either-defined --------- */
488
489static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
490 tl_assert(isShadowAtom(mce,a1));
491 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000492 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000493}
494
495static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
496 tl_assert(isShadowAtom(mce,a1));
497 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000498 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000499}
500
501static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
502 tl_assert(isShadowAtom(mce,a1));
503 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000504 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000505}
506
sewardj7010f6e2004-12-10 13:35:22 +0000507static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
508 tl_assert(isShadowAtom(mce,a1));
509 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000510 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000511}
512
sewardj20d38f22005-02-07 23:50:18 +0000513static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000514 tl_assert(isShadowAtom(mce,a1));
515 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000516 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000517}
518
sewardj350e8f72012-06-25 07:52:15 +0000519static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
520 tl_assert(isShadowAtom(mce,a1));
521 tl_assert(isShadowAtom(mce,a2));
522 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
523}
524
sewardj95448072004-11-22 20:19:51 +0000525/* --------- Undefined-if-either-undefined --------- */
526
527static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
528 tl_assert(isShadowAtom(mce,a1));
529 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000530 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000531}
532
533static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
534 tl_assert(isShadowAtom(mce,a1));
535 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000536 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000537}
538
539static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
540 tl_assert(isShadowAtom(mce,a1));
541 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000542 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000543}
544
545static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
546 tl_assert(isShadowAtom(mce,a1));
547 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000548 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000549}
550
sewardjb5b87402011-03-07 16:05:35 +0000551static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
552 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
553 tl_assert(isShadowAtom(mce,a1));
554 tl_assert(isShadowAtom(mce,a2));
555 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
556 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
557 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
558 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
559 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
560 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
561
562 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
563}
564
sewardj20d38f22005-02-07 23:50:18 +0000565static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000566 tl_assert(isShadowAtom(mce,a1));
567 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000568 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000569}
570
sewardj350e8f72012-06-25 07:52:15 +0000571static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
572 tl_assert(isShadowAtom(mce,a1));
573 tl_assert(isShadowAtom(mce,a2));
574 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
575}
576
sewardje50a1b12004-12-17 01:24:54 +0000577static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000578 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000579 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000580 case Ity_I16: return mkUifU16(mce, a1, a2);
581 case Ity_I32: return mkUifU32(mce, a1, a2);
582 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardjb5b87402011-03-07 16:05:35 +0000583 case Ity_I128: return mkUifU128(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000584 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardja2f30952013-03-27 11:40:02 +0000585 case Ity_V256: return mkUifUV256(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000586 default:
587 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
588 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000589 }
590}
591
sewardj95448072004-11-22 20:19:51 +0000592/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000593
sewardj95448072004-11-22 20:19:51 +0000594static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
595 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000596 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000597}
598
599static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
600 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000601 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000602}
603
604static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
605 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000606 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000607}
608
sewardj681be302005-01-15 20:43:58 +0000609static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
610 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000611 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000612}
613
sewardj95448072004-11-22 20:19:51 +0000614/* --------- 'Improvement' functions for AND/OR. --------- */
615
616/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
617 defined (0); all other -> undefined (1).
618*/
619static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000620{
sewardj95448072004-11-22 20:19:51 +0000621 tl_assert(isOriginalAtom(mce, data));
622 tl_assert(isShadowAtom(mce, vbits));
623 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000624 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000625}
njn25e49d8e72002-09-23 09:36:25 +0000626
sewardj95448072004-11-22 20:19:51 +0000627static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
628{
629 tl_assert(isOriginalAtom(mce, data));
630 tl_assert(isShadowAtom(mce, vbits));
631 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000632 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000633}
njn25e49d8e72002-09-23 09:36:25 +0000634
sewardj95448072004-11-22 20:19:51 +0000635static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
636{
637 tl_assert(isOriginalAtom(mce, data));
638 tl_assert(isShadowAtom(mce, vbits));
639 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000640 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000641}
njn25e49d8e72002-09-23 09:36:25 +0000642
sewardj7010f6e2004-12-10 13:35:22 +0000643static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
644{
645 tl_assert(isOriginalAtom(mce, data));
646 tl_assert(isShadowAtom(mce, vbits));
647 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000648 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000649}
650
sewardj20d38f22005-02-07 23:50:18 +0000651static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000652{
653 tl_assert(isOriginalAtom(mce, data));
654 tl_assert(isShadowAtom(mce, vbits));
655 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000656 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000657}
658
sewardj350e8f72012-06-25 07:52:15 +0000659static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
660{
661 tl_assert(isOriginalAtom(mce, data));
662 tl_assert(isShadowAtom(mce, vbits));
663 tl_assert(sameKindedAtoms(data, vbits));
664 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
665}
666
sewardj95448072004-11-22 20:19:51 +0000667/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
668 defined (0); all other -> undefined (1).
669*/
670static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
671{
672 tl_assert(isOriginalAtom(mce, data));
673 tl_assert(isShadowAtom(mce, vbits));
674 tl_assert(sameKindedAtoms(data, vbits));
675 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000676 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000677 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000678 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000679 vbits) );
680}
njn25e49d8e72002-09-23 09:36:25 +0000681
sewardj95448072004-11-22 20:19:51 +0000682static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
683{
684 tl_assert(isOriginalAtom(mce, data));
685 tl_assert(isShadowAtom(mce, vbits));
686 tl_assert(sameKindedAtoms(data, vbits));
687 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000688 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000689 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000690 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000691 vbits) );
692}
njn25e49d8e72002-09-23 09:36:25 +0000693
sewardj95448072004-11-22 20:19:51 +0000694static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
695{
696 tl_assert(isOriginalAtom(mce, data));
697 tl_assert(isShadowAtom(mce, vbits));
698 tl_assert(sameKindedAtoms(data, vbits));
699 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000700 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000701 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000702 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000703 vbits) );
704}
705
sewardj7010f6e2004-12-10 13:35:22 +0000706static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
707{
708 tl_assert(isOriginalAtom(mce, data));
709 tl_assert(isShadowAtom(mce, vbits));
710 tl_assert(sameKindedAtoms(data, vbits));
711 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000712 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000713 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000714 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000715 vbits) );
716}
717
sewardj20d38f22005-02-07 23:50:18 +0000718static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000719{
720 tl_assert(isOriginalAtom(mce, data));
721 tl_assert(isShadowAtom(mce, vbits));
722 tl_assert(sameKindedAtoms(data, vbits));
723 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000724 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000725 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000726 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000727 vbits) );
728}
729
sewardj350e8f72012-06-25 07:52:15 +0000730static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
731{
732 tl_assert(isOriginalAtom(mce, data));
733 tl_assert(isShadowAtom(mce, vbits));
734 tl_assert(sameKindedAtoms(data, vbits));
735 return assignNew(
736 'V', mce, Ity_V256,
737 binop(Iop_OrV256,
738 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
739 vbits) );
740}
741
sewardj95448072004-11-22 20:19:51 +0000742/* --------- Pessimising casts. --------- */
743
sewardjb5b87402011-03-07 16:05:35 +0000744/* The function returns an expression of type DST_TY. If any of the VBITS
745 is undefined (value == 1) the resulting expression has all bits set to
746 1. Otherwise, all bits are 0. */
747
sewardj95448072004-11-22 20:19:51 +0000748static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
749{
sewardj4cc684b2007-08-25 23:09:36 +0000750 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000751 IRAtom* tmp1;
sewardj2eecb742012-06-01 16:11:41 +0000752
sewardj95448072004-11-22 20:19:51 +0000753 /* Note, dst_ty is a shadow type, not an original type. */
sewardj95448072004-11-22 20:19:51 +0000754 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000755 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000756
757 /* Fast-track some common cases */
758 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000759 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000760
761 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000762 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000763
764 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj2eecb742012-06-01 16:11:41 +0000765 /* PCast the arg, then clone it. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000766 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
767 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000768 }
769
sewardj2eecb742012-06-01 16:11:41 +0000770 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
771 /* PCast the arg. This gives all 0s or all 1s. Then throw away
772 the top half. */
773 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
774 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
775 }
776
sewardj4cc684b2007-08-25 23:09:36 +0000777 /* Else do it the slow way .. */
sewardj2eecb742012-06-01 16:11:41 +0000778 /* First of all, collapse vbits down to a single bit. */
sewardj4cc684b2007-08-25 23:09:36 +0000779 tmp1 = NULL;
780 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000781 case Ity_I1:
782 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000783 break;
sewardj95448072004-11-22 20:19:51 +0000784 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000785 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000786 break;
787 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000788 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000789 break;
790 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000791 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000792 break;
793 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000794 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000795 break;
sewardj69a13322005-04-23 01:14:51 +0000796 case Ity_I128: {
797 /* Gah. Chop it in half, OR the halves together, and compare
798 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000799 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
800 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
801 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
802 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000803 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000804 break;
805 }
sewardj95448072004-11-22 20:19:51 +0000806 default:
sewardj4cc684b2007-08-25 23:09:36 +0000807 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000808 VG_(tool_panic)("mkPCastTo(1)");
809 }
810 tl_assert(tmp1);
811 /* Now widen up to the dst type. */
812 switch (dst_ty) {
813 case Ity_I1:
814 return tmp1;
815 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000816 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000817 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000818 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000819 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000820 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000821 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000822 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000823 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000824 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
825 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000826 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000827 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000828 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
829 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000830 return tmp1;
sewardja2f30952013-03-27 11:40:02 +0000831 case Ity_V256:
832 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
833 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
834 tmp1, tmp1));
835 tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
836 tmp1, tmp1));
837 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000838 default:
839 ppIRType(dst_ty);
840 VG_(tool_panic)("mkPCastTo(2)");
841 }
842}
843
sewardjd5204dc2004-12-31 01:16:11 +0000844/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
845/*
846 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
847 PCasting to Ity_U1. However, sometimes it is necessary to be more
848 accurate. The insight is that the result is defined if two
849 corresponding bits can be found, one from each argument, so that
850 both bits are defined but are different -- that makes EQ say "No"
851 and NE say "Yes". Hence, we compute an improvement term and DifD
852 it onto the "normal" (UifU) result.
853
854 The result is:
855
856 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000857 -- naive version
858 PCastTo<sz>( UifU<sz>(vxx, vyy) )
859
sewardjd5204dc2004-12-31 01:16:11 +0000860 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000861
862 -- improvement term
863 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000864 )
sewardje6f8af42005-07-06 18:48:59 +0000865
sewardjd5204dc2004-12-31 01:16:11 +0000866 where
867 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000868 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000869
sewardje6f8af42005-07-06 18:48:59 +0000870 vec = Or<sz>( vxx, // 0 iff bit defined
871 vyy, // 0 iff bit defined
872 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
873 )
874
875 If any bit of vec is 0, the result is defined and so the
876 improvement term should produce 0...0, else it should produce
877 1...1.
878
879 Hence require for the improvement term:
880
881 if vec == 1...1 then 1...1 else 0...0
882 ->
883 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
884
885 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000886*/
887static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
888 IRType ty,
889 IRAtom* vxx, IRAtom* vyy,
890 IRAtom* xx, IRAtom* yy )
891{
sewardje6f8af42005-07-06 18:48:59 +0000892 IRAtom *naive, *vec, *improvement_term;
893 IRAtom *improved, *final_cast, *top;
894 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000895
896 tl_assert(isShadowAtom(mce,vxx));
897 tl_assert(isShadowAtom(mce,vyy));
898 tl_assert(isOriginalAtom(mce,xx));
899 tl_assert(isOriginalAtom(mce,yy));
900 tl_assert(sameKindedAtoms(vxx,xx));
901 tl_assert(sameKindedAtoms(vyy,yy));
902
903 switch (ty) {
sewardj4cfa81b2012-11-08 10:58:16 +0000904 case Ity_I16:
905 opOR = Iop_Or16;
906 opDIFD = Iop_And16;
907 opUIFU = Iop_Or16;
908 opNOT = Iop_Not16;
909 opXOR = Iop_Xor16;
910 opCMP = Iop_CmpEQ16;
911 top = mkU16(0xFFFF);
912 break;
sewardjd5204dc2004-12-31 01:16:11 +0000913 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000914 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000915 opDIFD = Iop_And32;
916 opUIFU = Iop_Or32;
917 opNOT = Iop_Not32;
918 opXOR = Iop_Xor32;
919 opCMP = Iop_CmpEQ32;
920 top = mkU32(0xFFFFFFFF);
921 break;
tomcd986332005-04-26 07:44:48 +0000922 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000923 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000924 opDIFD = Iop_And64;
925 opUIFU = Iop_Or64;
926 opNOT = Iop_Not64;
927 opXOR = Iop_Xor64;
928 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000929 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000930 break;
sewardjd5204dc2004-12-31 01:16:11 +0000931 default:
932 VG_(tool_panic)("expensiveCmpEQorNE");
933 }
934
935 naive
sewardj7cf4e6b2008-05-01 20:24:26 +0000936 = mkPCastTo(mce,ty,
937 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +0000938
939 vec
940 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000941 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000942 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +0000943 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000944 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000945 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +0000946 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +0000947 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +0000948
sewardje6f8af42005-07-06 18:48:59 +0000949 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +0000950 = mkPCastTo( mce,ty,
951 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +0000952
953 improved
sewardj7cf4e6b2008-05-01 20:24:26 +0000954 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000955
956 final_cast
957 = mkPCastTo( mce, Ity_I1, improved );
958
959 return final_cast;
960}
961
sewardj95448072004-11-22 20:19:51 +0000962
sewardj992dff92005-10-07 11:08:55 +0000963/* --------- Semi-accurate interpretation of CmpORD. --------- */
964
965/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
966
967 CmpORD32S(x,y) = 1<<3 if x <s y
968 = 1<<2 if x >s y
969 = 1<<1 if x == y
970
971 and similarly the unsigned variant. The default interpretation is:
972
973 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000974 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000975
976 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
977 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000978
979 Also deal with a special case better:
980
981 CmpORD32S(x,0)
982
983 Here, bit 3 (LT) of the result is a copy of the top bit of x and
984 will be defined even if the rest of x isn't. In which case we do:
985
986 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +0000987 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
988 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +0000989
sewardj1bc82102005-12-23 00:16:24 +0000990 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +0000991*/
sewardja9e62a92005-10-07 12:13:21 +0000992static Bool isZeroU32 ( IRAtom* e )
993{
994 return
995 toBool( e->tag == Iex_Const
996 && e->Iex.Const.con->tag == Ico_U32
997 && e->Iex.Const.con->Ico.U32 == 0 );
998}
999
sewardj1bc82102005-12-23 00:16:24 +00001000static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +00001001{
sewardj1bc82102005-12-23 00:16:24 +00001002 return
1003 toBool( e->tag == Iex_Const
1004 && e->Iex.Const.con->tag == Ico_U64
1005 && e->Iex.Const.con->Ico.U64 == 0 );
1006}
1007
1008static IRAtom* doCmpORD ( MCEnv* mce,
1009 IROp cmp_op,
1010 IRAtom* xxhash, IRAtom* yyhash,
1011 IRAtom* xx, IRAtom* yy )
1012{
1013 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
1014 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
1015 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
1016 IROp opAND = m64 ? Iop_And64 : Iop_And32;
1017 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
1018 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
1019 IRType ty = m64 ? Ity_I64 : Ity_I32;
1020 Int width = m64 ? 64 : 32;
1021
1022 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1023
1024 IRAtom* threeLeft1 = NULL;
1025 IRAtom* sevenLeft1 = NULL;
1026
sewardj992dff92005-10-07 11:08:55 +00001027 tl_assert(isShadowAtom(mce,xxhash));
1028 tl_assert(isShadowAtom(mce,yyhash));
1029 tl_assert(isOriginalAtom(mce,xx));
1030 tl_assert(isOriginalAtom(mce,yy));
1031 tl_assert(sameKindedAtoms(xxhash,xx));
1032 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +00001033 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1034 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +00001035
sewardja9e62a92005-10-07 12:13:21 +00001036 if (0) {
1037 ppIROp(cmp_op); VG_(printf)(" ");
1038 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1039 }
1040
sewardj1bc82102005-12-23 00:16:24 +00001041 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +00001042 /* fancy interpretation */
1043 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +00001044 tl_assert(isZero(yyhash));
1045 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +00001046 return
1047 binop(
sewardj1bc82102005-12-23 00:16:24 +00001048 opOR,
sewardja9e62a92005-10-07 12:13:21 +00001049 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001050 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001051 binop(
sewardj1bc82102005-12-23 00:16:24 +00001052 opAND,
1053 mkPCastTo(mce,ty, xxhash),
1054 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +00001055 )),
1056 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001057 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001058 binop(
sewardj1bc82102005-12-23 00:16:24 +00001059 opSHL,
sewardja9e62a92005-10-07 12:13:21 +00001060 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001061 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +00001062 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +00001063 mkU8(3)
1064 ))
1065 );
1066 } else {
1067 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +00001068 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +00001069 return
1070 binop(
sewardj1bc82102005-12-23 00:16:24 +00001071 opAND,
1072 mkPCastTo( mce,ty,
1073 mkUifU(mce,ty, xxhash,yyhash)),
1074 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +00001075 );
1076 }
sewardj992dff92005-10-07 11:08:55 +00001077}
1078
1079
sewardj95448072004-11-22 20:19:51 +00001080/*------------------------------------------------------------*/
1081/*--- Emit a test and complaint if something is undefined. ---*/
1082/*------------------------------------------------------------*/
1083
sewardj7cf4e6b2008-05-01 20:24:26 +00001084static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1085
1086
sewardj95448072004-11-22 20:19:51 +00001087/* Set the annotations on a dirty helper to indicate that the stack
1088 pointer and instruction pointers might be read. This is the
1089 behaviour of all 'emit-a-complaint' style functions we might
1090 call. */
1091
1092static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1093 di->nFxState = 2;
sewardj2eecb742012-06-01 16:11:41 +00001094 di->fxState[0].fx = Ifx_Read;
1095 di->fxState[0].offset = mce->layout->offset_SP;
1096 di->fxState[0].size = mce->layout->sizeof_SP;
1097 di->fxState[0].nRepeats = 0;
1098 di->fxState[0].repeatLen = 0;
1099 di->fxState[1].fx = Ifx_Read;
1100 di->fxState[1].offset = mce->layout->offset_IP;
1101 di->fxState[1].size = mce->layout->sizeof_IP;
1102 di->fxState[1].nRepeats = 0;
1103 di->fxState[1].repeatLen = 0;
sewardj95448072004-11-22 20:19:51 +00001104}
1105
1106
sewardjcafe5052013-01-17 14:24:35 +00001107/* Check the supplied *original* |atom| for undefinedness, and emit a
sewardj95448072004-11-22 20:19:51 +00001108 complaint if so. Once that happens, mark it as defined. This is
1109 possible because the atom is either a tmp or literal. If it's a
1110 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1111 be defined. In fact as mentioned above, we will have to allocate a
1112 new tmp to carry the new 'defined' shadow value, and update the
1113 original->tmp mapping accordingly; we cannot simply assign a new
sewardjcafe5052013-01-17 14:24:35 +00001114 value to an existing shadow tmp as this breaks SSAness.
1115
sewardjb9e6d242013-05-11 13:42:08 +00001116 The checks are performed, any resulting complaint emitted, and
1117 |atom|'s shadow temp set to 'defined', ONLY in the case that
1118 |guard| evaluates to True at run-time. If it evaluates to False
1119 then no action is performed. If |guard| is NULL (the usual case)
1120 then it is assumed to be always-true, and hence these actions are
1121 performed unconditionally.
1122
1123 This routine does not generate code to check the definedness of
1124 |guard|. The caller is assumed to have taken care of that already.
sewardj95448072004-11-22 20:19:51 +00001125*/
sewardjb9e6d242013-05-11 13:42:08 +00001126static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001127{
sewardj7cf97ee2004-11-28 14:25:01 +00001128 IRAtom* vatom;
1129 IRType ty;
1130 Int sz;
1131 IRDirty* di;
1132 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +00001133 IRAtom* origin;
1134 void* fn;
florian6bd9dc12012-11-23 16:17:43 +00001135 const HChar* nm;
sewardj7cf4e6b2008-05-01 20:24:26 +00001136 IRExpr** args;
1137 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001138
njn1d0825f2006-03-27 11:37:07 +00001139 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001140 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001141 return;
1142
sewardjb9e6d242013-05-11 13:42:08 +00001143 if (guard)
1144 tl_assert(isOriginalAtom(mce, guard));
1145
sewardj95448072004-11-22 20:19:51 +00001146 /* Since the original expression is atomic, there's no duplicated
1147 work generated by making multiple V-expressions for it. So we
1148 don't really care about the possibility that someone else may
1149 also create a V-interpretion for it. */
1150 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001151 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001152 tl_assert(isShadowAtom(mce, vatom));
1153 tl_assert(sameKindedAtoms(atom, vatom));
1154
sewardj1c0ce7a2009-07-01 08:10:49 +00001155 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001156
1157 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001158 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001159
sewardj7cf97ee2004-11-28 14:25:01 +00001160 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001161 /* cond will be 0 if all defined, and 1 if any not defined. */
1162
sewardj7cf4e6b2008-05-01 20:24:26 +00001163 /* Get the origin info for the value we are about to check. At
1164 least, if we are doing origin tracking. If not, use a dummy
1165 zero origin. */
1166 if (MC_(clo_mc_level) == 3) {
1167 origin = schemeE( mce, atom );
1168 if (mce->hWordTy == Ity_I64) {
1169 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1170 }
1171 } else {
1172 origin = NULL;
1173 }
1174
1175 fn = NULL;
1176 nm = NULL;
1177 args = NULL;
1178 nargs = -1;
1179
sewardj95448072004-11-22 20:19:51 +00001180 switch (sz) {
1181 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001182 if (origin) {
1183 fn = &MC_(helperc_value_check0_fail_w_o);
1184 nm = "MC_(helperc_value_check0_fail_w_o)";
1185 args = mkIRExprVec_1(origin);
1186 nargs = 1;
1187 } else {
1188 fn = &MC_(helperc_value_check0_fail_no_o);
1189 nm = "MC_(helperc_value_check0_fail_no_o)";
1190 args = mkIRExprVec_0();
1191 nargs = 0;
1192 }
sewardj95448072004-11-22 20:19:51 +00001193 break;
1194 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001195 if (origin) {
1196 fn = &MC_(helperc_value_check1_fail_w_o);
1197 nm = "MC_(helperc_value_check1_fail_w_o)";
1198 args = mkIRExprVec_1(origin);
1199 nargs = 1;
1200 } else {
1201 fn = &MC_(helperc_value_check1_fail_no_o);
1202 nm = "MC_(helperc_value_check1_fail_no_o)";
1203 args = mkIRExprVec_0();
1204 nargs = 0;
1205 }
sewardj95448072004-11-22 20:19:51 +00001206 break;
1207 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001208 if (origin) {
1209 fn = &MC_(helperc_value_check4_fail_w_o);
1210 nm = "MC_(helperc_value_check4_fail_w_o)";
1211 args = mkIRExprVec_1(origin);
1212 nargs = 1;
1213 } else {
1214 fn = &MC_(helperc_value_check4_fail_no_o);
1215 nm = "MC_(helperc_value_check4_fail_no_o)";
1216 args = mkIRExprVec_0();
1217 nargs = 0;
1218 }
sewardj95448072004-11-22 20:19:51 +00001219 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001220 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001221 if (origin) {
1222 fn = &MC_(helperc_value_check8_fail_w_o);
1223 nm = "MC_(helperc_value_check8_fail_w_o)";
1224 args = mkIRExprVec_1(origin);
1225 nargs = 1;
1226 } else {
1227 fn = &MC_(helperc_value_check8_fail_no_o);
1228 nm = "MC_(helperc_value_check8_fail_no_o)";
1229 args = mkIRExprVec_0();
1230 nargs = 0;
1231 }
sewardj11bcc4e2005-04-23 22:38:38 +00001232 break;
njn4c245e52009-03-15 23:25:38 +00001233 case 2:
1234 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001235 if (origin) {
1236 fn = &MC_(helperc_value_checkN_fail_w_o);
1237 nm = "MC_(helperc_value_checkN_fail_w_o)";
1238 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1239 nargs = 2;
1240 } else {
1241 fn = &MC_(helperc_value_checkN_fail_no_o);
1242 nm = "MC_(helperc_value_checkN_fail_no_o)";
1243 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1244 nargs = 1;
1245 }
sewardj95448072004-11-22 20:19:51 +00001246 break;
njn4c245e52009-03-15 23:25:38 +00001247 default:
1248 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001249 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001250
1251 tl_assert(fn);
1252 tl_assert(nm);
1253 tl_assert(args);
1254 tl_assert(nargs >= 0 && nargs <= 2);
1255 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1256 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1257
1258 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1259 VG_(fnptr_to_fnentry)( fn ), args );
sewardjb9e6d242013-05-11 13:42:08 +00001260 di->guard = cond; // and cond is PCast-to-1(atom#)
1261
1262 /* If the complaint is to be issued under a guard condition, AND
1263 that into the guard condition for the helper call. */
1264 if (guard) {
1265 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1266 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1267 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1268 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1269 }
florian434ffae2012-07-19 17:23:42 +00001270
sewardj95448072004-11-22 20:19:51 +00001271 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001272 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001273
sewardjb9e6d242013-05-11 13:42:08 +00001274 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1275 defined -- but only in the case where the guard evaluates to
1276 True at run-time. Do the update by setting the orig->shadow
1277 mapping for tmp to reflect the fact that this shadow is getting
1278 a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001279 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001280 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001281 if (vatom->tag == Iex_RdTmp) {
1282 tl_assert(atom->tag == Iex_RdTmp);
sewardjb9e6d242013-05-11 13:42:08 +00001283 if (guard == NULL) {
1284 // guard is 'always True', hence update unconditionally
1285 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1286 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1287 definedOfType(ty));
1288 } else {
1289 // update the temp only conditionally. Do this by copying
1290 // its old value when the guard is False.
1291 // The old value ..
1292 IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1293 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1294 IRAtom* new_tmpV
1295 = assignNew('V', mce, shadowTypeV(ty),
1296 IRExpr_ITE(guard, definedOfType(ty),
1297 mkexpr(old_tmpV)));
1298 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV);
1299 }
sewardj95448072004-11-22 20:19:51 +00001300 }
1301}
1302
1303
1304/*------------------------------------------------------------*/
1305/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1306/*------------------------------------------------------------*/
1307
1308/* Examine the always-defined sections declared in layout to see if
1309 the (offset,size) section is within one. Note, is is an error to
1310 partially fall into such a region: (offset,size) should either be
1311 completely in such a region or completely not-in such a region.
1312*/
1313static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1314{
1315 Int minoffD, maxoffD, i;
1316 Int minoff = offset;
1317 Int maxoff = minoff + size - 1;
1318 tl_assert((minoff & ~0xFFFF) == 0);
1319 tl_assert((maxoff & ~0xFFFF) == 0);
1320
1321 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1322 minoffD = mce->layout->alwaysDefd[i].offset;
1323 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1324 tl_assert((minoffD & ~0xFFFF) == 0);
1325 tl_assert((maxoffD & ~0xFFFF) == 0);
1326
1327 if (maxoff < minoffD || maxoffD < minoff)
1328 continue; /* no overlap */
1329 if (minoff >= minoffD && maxoff <= maxoffD)
1330 return True; /* completely contained in an always-defd section */
1331
1332 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1333 }
1334 return False; /* could not find any containing section */
1335}
1336
1337
1338/* Generate into bb suitable actions to shadow this Put. If the state
1339 slice is marked 'always defined', do nothing. Otherwise, write the
1340 supplied V bits to the shadow state. We can pass in either an
1341 original atom or a V-atom, but not both. In the former case the
1342 relevant V-bits are then generated from the original.
florian434ffae2012-07-19 17:23:42 +00001343 We assume here, that the definedness of GUARD has already been checked.
sewardj95448072004-11-22 20:19:51 +00001344*/
1345static
1346void do_shadow_PUT ( MCEnv* mce, Int offset,
florian434ffae2012-07-19 17:23:42 +00001347 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001348{
sewardj7cf97ee2004-11-28 14:25:01 +00001349 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001350
1351 // Don't do shadow PUTs if we're not doing undefined value checking.
1352 // Their absence lets Vex's optimiser remove all the shadow computation
1353 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001354 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001355 return;
1356
sewardj95448072004-11-22 20:19:51 +00001357 if (atom) {
1358 tl_assert(!vatom);
1359 tl_assert(isOriginalAtom(mce, atom));
1360 vatom = expr2vbits( mce, atom );
1361 } else {
1362 tl_assert(vatom);
1363 tl_assert(isShadowAtom(mce, vatom));
1364 }
1365
sewardj1c0ce7a2009-07-01 08:10:49 +00001366 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001367 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001368 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001369 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1370 /* later: no ... */
1371 /* emit code to emit a complaint if any of the vbits are 1. */
1372 /* complainIfUndefined(mce, atom); */
1373 } else {
1374 /* Do a plain shadow Put. */
florian434ffae2012-07-19 17:23:42 +00001375 if (guard) {
1376 /* If the guard expression evaluates to false we simply Put the value
1377 that is already stored in the guest state slot */
1378 IRAtom *cond, *iffalse;
1379
sewardjcc961652013-01-26 11:49:15 +00001380 cond = assignNew('V', mce, Ity_I1, guard);
florian434ffae2012-07-19 17:23:42 +00001381 iffalse = assignNew('V', mce, ty,
1382 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
florian5686b2d2013-01-29 03:57:40 +00001383 vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse));
florian434ffae2012-07-19 17:23:42 +00001384 }
1385 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
sewardj95448072004-11-22 20:19:51 +00001386 }
1387}
1388
1389
1390/* Return an expression which contains the V bits corresponding to the
1391 given GETI (passed in in pieces).
1392*/
1393static
floriand39b0222012-05-31 15:48:13 +00001394void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
sewardj95448072004-11-22 20:19:51 +00001395{
sewardj7cf97ee2004-11-28 14:25:01 +00001396 IRAtom* vatom;
1397 IRType ty, tyS;
1398 Int arrSize;;
floriand39b0222012-05-31 15:48:13 +00001399 IRRegArray* descr = puti->descr;
1400 IRAtom* ix = puti->ix;
1401 Int bias = puti->bias;
1402 IRAtom* atom = puti->data;
sewardj7cf97ee2004-11-28 14:25:01 +00001403
njn1d0825f2006-03-27 11:37:07 +00001404 // Don't do shadow PUTIs if we're not doing undefined value checking.
1405 // Their absence lets Vex's optimiser remove all the shadow computation
1406 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001407 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001408 return;
1409
sewardj95448072004-11-22 20:19:51 +00001410 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001411 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001412 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001413 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001414 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001415 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001416 tl_assert(ty != Ity_I1);
1417 tl_assert(isOriginalAtom(mce,ix));
sewardjb9e6d242013-05-11 13:42:08 +00001418 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001419 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1420 /* later: no ... */
1421 /* emit code to emit a complaint if any of the vbits are 1. */
1422 /* complainIfUndefined(mce, atom); */
1423 } else {
1424 /* Do a cloned version of the Put that refers to the shadow
1425 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001426 IRRegArray* new_descr
1427 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1428 tyS, descr->nElems);
floriand39b0222012-05-31 15:48:13 +00001429 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
sewardj95448072004-11-22 20:19:51 +00001430 }
1431}
1432
1433
1434/* Return an expression which contains the V bits corresponding to the
1435 given GET (passed in in pieces).
1436*/
1437static
1438IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1439{
sewardj7cf4e6b2008-05-01 20:24:26 +00001440 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001441 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001442 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001443 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1444 /* Always defined, return all zeroes of the relevant type */
1445 return definedOfType(tyS);
1446 } else {
1447 /* return a cloned version of the Get that refers to the shadow
1448 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001449 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001450 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1451 }
1452}
1453
1454
1455/* Return an expression which contains the V bits corresponding to the
1456 given GETI (passed in in pieces).
1457*/
1458static
sewardj0b9d74a2006-12-24 02:24:11 +00001459IRExpr* shadow_GETI ( MCEnv* mce,
1460 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001461{
1462 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001463 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001464 Int arrSize = descr->nElems * sizeofIRType(ty);
1465 tl_assert(ty != Ity_I1);
1466 tl_assert(isOriginalAtom(mce,ix));
sewardjb9e6d242013-05-11 13:42:08 +00001467 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001468 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1469 /* Always defined, return all zeroes of the relevant type */
1470 return definedOfType(tyS);
1471 } else {
1472 /* return a cloned version of the Get that refers to the shadow
1473 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001474 IRRegArray* new_descr
1475 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1476 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001477 return IRExpr_GetI( new_descr, ix, bias );
1478 }
1479}
1480
1481
1482/*------------------------------------------------------------*/
1483/*--- Generating approximations for unknown operations, ---*/
1484/*--- using lazy-propagate semantics ---*/
1485/*------------------------------------------------------------*/
1486
1487/* Lazy propagation of undefinedness from two values, resulting in the
1488 specified shadow type.
1489*/
1490static
1491IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1492{
sewardj95448072004-11-22 20:19:51 +00001493 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001494 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1495 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001496 tl_assert(isShadowAtom(mce,va1));
1497 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001498
1499 /* The general case is inefficient because PCast is an expensive
1500 operation. Here are some special cases which use PCast only
1501 once rather than twice. */
1502
1503 /* I64 x I64 -> I64 */
1504 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1505 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1506 at = mkUifU(mce, Ity_I64, va1, va2);
1507 at = mkPCastTo(mce, Ity_I64, at);
1508 return at;
1509 }
1510
1511 /* I64 x I64 -> I32 */
1512 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1513 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1514 at = mkUifU(mce, Ity_I64, va1, va2);
1515 at = mkPCastTo(mce, Ity_I32, at);
1516 return at;
1517 }
1518
1519 if (0) {
1520 VG_(printf)("mkLazy2 ");
1521 ppIRType(t1);
1522 VG_(printf)("_");
1523 ppIRType(t2);
1524 VG_(printf)("_");
1525 ppIRType(finalVty);
1526 VG_(printf)("\n");
1527 }
1528
1529 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001530 at = mkPCastTo(mce, Ity_I32, va1);
1531 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1532 at = mkPCastTo(mce, finalVty, at);
1533 return at;
1534}
1535
1536
sewardjed69fdb2006-02-03 16:12:27 +00001537/* 3-arg version of the above. */
1538static
1539IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1540 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1541{
1542 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001543 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1544 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1545 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001546 tl_assert(isShadowAtom(mce,va1));
1547 tl_assert(isShadowAtom(mce,va2));
1548 tl_assert(isShadowAtom(mce,va3));
1549
1550 /* The general case is inefficient because PCast is an expensive
1551 operation. Here are some special cases which use PCast only
1552 twice rather than three times. */
1553
1554 /* I32 x I64 x I64 -> I64 */
1555 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1556 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1557 && finalVty == Ity_I64) {
1558 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1559 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1560 mode indication which is fully defined, this should get
1561 folded out later. */
1562 at = mkPCastTo(mce, Ity_I64, va1);
1563 /* Now fold in 2nd and 3rd args. */
1564 at = mkUifU(mce, Ity_I64, at, va2);
1565 at = mkUifU(mce, Ity_I64, at, va3);
1566 /* and PCast once again. */
1567 at = mkPCastTo(mce, Ity_I64, at);
1568 return at;
1569 }
1570
carllfb583cb2013-01-22 20:26:34 +00001571 /* I32 x I8 x I64 -> I64 */
1572 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64
1573 && finalVty == Ity_I64) {
1574 if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n");
1575 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a
1576 * rounding mode indication which is fully defined, this should
1577 * get folded out later.
1578 */
1579 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1580 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1581 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1582 at = mkUifU(mce, Ity_I64, at, va3);
1583 /* and PCast once again. */
1584 at = mkPCastTo(mce, Ity_I64, at);
1585 return at;
1586 }
1587
sewardj453e8f82006-02-09 03:25:06 +00001588 /* I32 x I64 x I64 -> I32 */
1589 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1590 && finalVty == Ity_I32) {
sewardj59570ff2010-01-01 11:59:33 +00001591 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
sewardj453e8f82006-02-09 03:25:06 +00001592 at = mkPCastTo(mce, Ity_I64, va1);
1593 at = mkUifU(mce, Ity_I64, at, va2);
1594 at = mkUifU(mce, Ity_I64, at, va3);
1595 at = mkPCastTo(mce, Ity_I32, at);
1596 return at;
1597 }
1598
sewardj59570ff2010-01-01 11:59:33 +00001599 /* I32 x I32 x I32 -> I32 */
1600 /* 32-bit FP idiom, as (eg) happens on ARM */
1601 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1602 && finalVty == Ity_I32) {
1603 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1604 at = va1;
1605 at = mkUifU(mce, Ity_I32, at, va2);
1606 at = mkUifU(mce, Ity_I32, at, va3);
1607 at = mkPCastTo(mce, Ity_I32, at);
1608 return at;
1609 }
1610
sewardjb5b87402011-03-07 16:05:35 +00001611 /* I32 x I128 x I128 -> I128 */
1612 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1613 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1614 && finalVty == Ity_I128) {
1615 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1616 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1617 mode indication which is fully defined, this should get
1618 folded out later. */
1619 at = mkPCastTo(mce, Ity_I128, va1);
1620 /* Now fold in 2nd and 3rd args. */
1621 at = mkUifU(mce, Ity_I128, at, va2);
1622 at = mkUifU(mce, Ity_I128, at, va3);
1623 /* and PCast once again. */
1624 at = mkPCastTo(mce, Ity_I128, at);
1625 return at;
1626 }
carllfb583cb2013-01-22 20:26:34 +00001627
1628 /* I32 x I8 x I128 -> I128 */
1629 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1630 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128
1631 && finalVty == Ity_I128) {
1632 if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n");
sewardja28c43c2013-01-29 17:18:56 +00001633 /* Use I64 as an intermediate type, which means PCasting all 3
1634 args to I64 to start with. 1st arg is typically a rounding
1635 mode indication which is fully defined, so we hope that it
1636 will get folded out later. */
carllfb583cb2013-01-22 20:26:34 +00001637 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1638 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
sewardja28c43c2013-01-29 17:18:56 +00001639 IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3);
1640 /* Now UifU all three together. */
carllfb583cb2013-01-22 20:26:34 +00001641 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
sewardja28c43c2013-01-29 17:18:56 +00001642 at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3)
carllfb583cb2013-01-22 20:26:34 +00001643 /* and PCast once again. */
1644 at = mkPCastTo(mce, Ity_I128, at);
1645 return at;
1646 }
sewardj453e8f82006-02-09 03:25:06 +00001647 if (1) {
1648 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001649 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001650 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001651 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001652 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001653 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001654 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001655 ppIRType(finalVty);
1656 VG_(printf)("\n");
1657 }
1658
sewardj453e8f82006-02-09 03:25:06 +00001659 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001660 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001661 /*
sewardjed69fdb2006-02-03 16:12:27 +00001662 at = mkPCastTo(mce, Ity_I32, va1);
1663 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1664 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1665 at = mkPCastTo(mce, finalVty, at);
1666 return at;
sewardj453e8f82006-02-09 03:25:06 +00001667 */
sewardjed69fdb2006-02-03 16:12:27 +00001668}
1669
1670
sewardje91cea72006-02-08 19:32:02 +00001671/* 4-arg version of the above. */
1672static
1673IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1674 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1675{
1676 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001677 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1678 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1679 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1680 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001681 tl_assert(isShadowAtom(mce,va1));
1682 tl_assert(isShadowAtom(mce,va2));
1683 tl_assert(isShadowAtom(mce,va3));
1684 tl_assert(isShadowAtom(mce,va4));
1685
1686 /* The general case is inefficient because PCast is an expensive
1687 operation. Here are some special cases which use PCast only
1688 twice rather than three times. */
1689
1690 /* I32 x I64 x I64 x I64 -> I64 */
1691 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1692 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1693 && finalVty == Ity_I64) {
1694 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1695 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1696 mode indication which is fully defined, this should get
1697 folded out later. */
1698 at = mkPCastTo(mce, Ity_I64, va1);
1699 /* Now fold in 2nd, 3rd, 4th args. */
1700 at = mkUifU(mce, Ity_I64, at, va2);
1701 at = mkUifU(mce, Ity_I64, at, va3);
1702 at = mkUifU(mce, Ity_I64, at, va4);
1703 /* and PCast once again. */
1704 at = mkPCastTo(mce, Ity_I64, at);
1705 return at;
1706 }
sewardjb5b87402011-03-07 16:05:35 +00001707 /* I32 x I32 x I32 x I32 -> I32 */
1708 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1709 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1710 && finalVty == Ity_I32) {
1711 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1712 at = va1;
1713 /* Now fold in 2nd, 3rd, 4th args. */
1714 at = mkUifU(mce, Ity_I32, at, va2);
1715 at = mkUifU(mce, Ity_I32, at, va3);
1716 at = mkUifU(mce, Ity_I32, at, va4);
1717 at = mkPCastTo(mce, Ity_I32, at);
1718 return at;
1719 }
sewardje91cea72006-02-08 19:32:02 +00001720
1721 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001722 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001723 ppIRType(t1);
1724 VG_(printf)(" x ");
1725 ppIRType(t2);
1726 VG_(printf)(" x ");
1727 ppIRType(t3);
1728 VG_(printf)(" x ");
1729 ppIRType(t4);
1730 VG_(printf)(" -> ");
1731 ppIRType(finalVty);
1732 VG_(printf)("\n");
1733 }
1734
1735 tl_assert(0);
1736}
1737
1738
sewardj95448072004-11-22 20:19:51 +00001739/* Do the lazy propagation game from a null-terminated vector of
1740 atoms. This is presumably the arguments to a helper call, so the
1741 IRCallee info is also supplied in order that we can know which
1742 arguments should be ignored (via the .mcx_mask field).
1743*/
1744static
1745IRAtom* mkLazyN ( MCEnv* mce,
1746 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1747{
sewardj4cc684b2007-08-25 23:09:36 +00001748 Int i;
sewardj95448072004-11-22 20:19:51 +00001749 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001750 IRAtom* curr;
1751 IRType mergeTy;
sewardj99430032011-05-04 09:09:31 +00001752 Bool mergeTy64 = True;
sewardj4cc684b2007-08-25 23:09:36 +00001753
1754 /* Decide on the type of the merge intermediary. If all relevant
1755 args are I64, then it's I64. In all other circumstances, use
1756 I32. */
1757 for (i = 0; exprvec[i]; i++) {
1758 tl_assert(i < 32);
1759 tl_assert(isOriginalAtom(mce, exprvec[i]));
1760 if (cee->mcx_mask & (1<<i))
1761 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001762 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001763 mergeTy64 = False;
1764 }
1765
1766 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1767 curr = definedOfType(mergeTy);
1768
sewardj95448072004-11-22 20:19:51 +00001769 for (i = 0; exprvec[i]; i++) {
1770 tl_assert(i < 32);
1771 tl_assert(isOriginalAtom(mce, exprvec[i]));
1772 /* Only take notice of this arg if the callee's mc-exclusion
1773 mask does not say it is to be excluded. */
1774 if (cee->mcx_mask & (1<<i)) {
1775 /* the arg is to be excluded from definedness checking. Do
1776 nothing. */
1777 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1778 } else {
1779 /* calculate the arg's definedness, and pessimistically merge
1780 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001781 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1782 curr = mergeTy64
1783 ? mkUifU64(mce, here, curr)
1784 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001785 }
1786 }
1787 return mkPCastTo(mce, finalVtype, curr );
1788}
1789
1790
1791/*------------------------------------------------------------*/
1792/*--- Generating expensive sequences for exact carry-chain ---*/
1793/*--- propagation in add/sub and related operations. ---*/
1794/*------------------------------------------------------------*/
1795
1796static
sewardjd5204dc2004-12-31 01:16:11 +00001797IRAtom* expensiveAddSub ( MCEnv* mce,
1798 Bool add,
1799 IRType ty,
1800 IRAtom* qaa, IRAtom* qbb,
1801 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001802{
sewardj7cf97ee2004-11-28 14:25:01 +00001803 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001804 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001805
sewardj95448072004-11-22 20:19:51 +00001806 tl_assert(isShadowAtom(mce,qaa));
1807 tl_assert(isShadowAtom(mce,qbb));
1808 tl_assert(isOriginalAtom(mce,aa));
1809 tl_assert(isOriginalAtom(mce,bb));
1810 tl_assert(sameKindedAtoms(qaa,aa));
1811 tl_assert(sameKindedAtoms(qbb,bb));
1812
sewardjd5204dc2004-12-31 01:16:11 +00001813 switch (ty) {
1814 case Ity_I32:
1815 opAND = Iop_And32;
1816 opOR = Iop_Or32;
1817 opXOR = Iop_Xor32;
1818 opNOT = Iop_Not32;
1819 opADD = Iop_Add32;
1820 opSUB = Iop_Sub32;
1821 break;
tomd9774d72005-06-27 08:11:01 +00001822 case Ity_I64:
1823 opAND = Iop_And64;
1824 opOR = Iop_Or64;
1825 opXOR = Iop_Xor64;
1826 opNOT = Iop_Not64;
1827 opADD = Iop_Add64;
1828 opSUB = Iop_Sub64;
1829 break;
sewardjd5204dc2004-12-31 01:16:11 +00001830 default:
1831 VG_(tool_panic)("expensiveAddSub");
1832 }
sewardj95448072004-11-22 20:19:51 +00001833
1834 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001835 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001836 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001837 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001838
1839 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001840 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001841 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001842 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001843
1844 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001845 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001846
1847 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001848 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001849
sewardjd5204dc2004-12-31 01:16:11 +00001850 if (add) {
1851 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1852 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001853 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001854 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001855 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1856 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001857 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001858 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1859 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001860 )
sewardj95448072004-11-22 20:19:51 +00001861 )
sewardjd5204dc2004-12-31 01:16:11 +00001862 )
1863 );
1864 } else {
1865 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1866 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001867 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001868 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001869 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1870 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001871 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001872 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1873 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001874 )
1875 )
1876 )
1877 );
1878 }
1879
sewardj95448072004-11-22 20:19:51 +00001880}
1881
1882
sewardj4cfa81b2012-11-08 10:58:16 +00001883static
1884IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
1885 IRAtom* atom, IRAtom* vatom )
1886{
1887 IRType ty;
1888 IROp xorOp, subOp, andOp;
1889 IRExpr *one;
1890 IRAtom *improver, *improved;
1891 tl_assert(isShadowAtom(mce,vatom));
1892 tl_assert(isOriginalAtom(mce,atom));
1893 tl_assert(sameKindedAtoms(atom,vatom));
1894
1895 switch (czop) {
1896 case Iop_Ctz32:
1897 ty = Ity_I32;
1898 xorOp = Iop_Xor32;
1899 subOp = Iop_Sub32;
1900 andOp = Iop_And32;
1901 one = mkU32(1);
1902 break;
1903 case Iop_Ctz64:
1904 ty = Ity_I64;
1905 xorOp = Iop_Xor64;
1906 subOp = Iop_Sub64;
1907 andOp = Iop_And64;
1908 one = mkU64(1);
1909 break;
1910 default:
1911 ppIROp(czop);
1912 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
1913 }
1914
1915 // improver = atom ^ (atom - 1)
1916 //
1917 // That is, improver has its low ctz(atom) bits equal to one;
1918 // higher bits (if any) equal to zero.
1919 improver = assignNew('V', mce,ty,
1920 binop(xorOp,
1921 atom,
1922 assignNew('V', mce, ty,
1923 binop(subOp, atom, one))));
1924
1925 // improved = vatom & improver
1926 //
1927 // That is, treat any V bits above the first ctz(atom) bits as
1928 // "defined".
1929 improved = assignNew('V', mce, ty,
1930 binop(andOp, vatom, improver));
1931
1932 // Return pessimizing cast of improved.
1933 return mkPCastTo(mce, ty, improved);
1934}
1935
1936
sewardj95448072004-11-22 20:19:51 +00001937/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001938/*--- Scalar shifts. ---*/
1939/*------------------------------------------------------------*/
1940
1941/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1942 idea is to shift the definedness bits by the original shift amount.
1943 This introduces 0s ("defined") in new positions for left shifts and
1944 unsigned right shifts, and copies the top definedness bit for
1945 signed right shifts. So, conveniently, applying the original shift
1946 operator to the definedness bits for the left arg is exactly the
1947 right thing to do:
1948
1949 (qaa << bb)
1950
1951 However if the shift amount is undefined then the whole result
1952 is undefined. Hence need:
1953
1954 (qaa << bb) `UifU` PCast(qbb)
1955
1956 If the shift amount bb is a literal than qbb will say 'all defined'
1957 and the UifU and PCast will get folded out by post-instrumentation
1958 optimisation.
1959*/
1960static IRAtom* scalarShift ( MCEnv* mce,
1961 IRType ty,
1962 IROp original_op,
1963 IRAtom* qaa, IRAtom* qbb,
1964 IRAtom* aa, IRAtom* bb )
1965{
1966 tl_assert(isShadowAtom(mce,qaa));
1967 tl_assert(isShadowAtom(mce,qbb));
1968 tl_assert(isOriginalAtom(mce,aa));
1969 tl_assert(isOriginalAtom(mce,bb));
1970 tl_assert(sameKindedAtoms(qaa,aa));
1971 tl_assert(sameKindedAtoms(qbb,bb));
1972 return
1973 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001974 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00001975 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00001976 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00001977 mkPCastTo(mce, ty, qbb)
1978 )
1979 );
1980}
1981
1982
1983/*------------------------------------------------------------*/
1984/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00001985/*------------------------------------------------------------*/
1986
sewardja1d93302004-12-12 16:45:06 +00001987/* Vector pessimisation -- pessimise within each lane individually. */
1988
1989static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1990{
sewardj7cf4e6b2008-05-01 20:24:26 +00001991 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00001992}
1993
1994static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1995{
sewardj7cf4e6b2008-05-01 20:24:26 +00001996 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00001997}
1998
1999static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
2000{
sewardj7cf4e6b2008-05-01 20:24:26 +00002001 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00002002}
2003
2004static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
2005{
sewardj7cf4e6b2008-05-01 20:24:26 +00002006 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00002007}
2008
sewardj350e8f72012-06-25 07:52:15 +00002009static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
2010{
2011 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
2012}
2013
2014static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
2015{
2016 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
2017}
2018
sewardjacd2e912005-01-13 19:17:06 +00002019static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
2020{
sewardj7cf4e6b2008-05-01 20:24:26 +00002021 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00002022}
2023
sewardja2f30952013-03-27 11:40:02 +00002024static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at )
2025{
2026 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at));
2027}
2028
sewardjacd2e912005-01-13 19:17:06 +00002029static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
2030{
sewardj7cf4e6b2008-05-01 20:24:26 +00002031 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00002032}
2033
sewardja2f30952013-03-27 11:40:02 +00002034static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at )
2035{
2036 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at));
2037}
2038
sewardjacd2e912005-01-13 19:17:06 +00002039static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
2040{
sewardj7cf4e6b2008-05-01 20:24:26 +00002041 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00002042}
2043
sewardjc678b852010-09-22 00:58:51 +00002044static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
2045{
2046 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
2047}
2048
2049static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
2050{
2051 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
2052}
2053
sewardja1d93302004-12-12 16:45:06 +00002054
sewardj3245c912004-12-10 14:58:26 +00002055/* Here's a simple scheme capable of handling ops derived from SSE1
2056 code and while only generating ops that can be efficiently
2057 implemented in SSE1. */
2058
2059/* All-lanes versions are straightforward:
2060
sewardj20d38f22005-02-07 23:50:18 +00002061 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00002062
2063 unary32Fx4(x,y) ==> PCast32x4(x#)
2064
2065 Lowest-lane-only versions are more complex:
2066
sewardj20d38f22005-02-07 23:50:18 +00002067 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00002068 x#,
sewardj20d38f22005-02-07 23:50:18 +00002069 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00002070 )
2071
2072 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00002073 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00002074 obvious scheme of taking the bottom 32 bits of each operand
2075 and doing a 32-bit UifU. Basically since UifU is fast and
2076 chopping lanes off vector values is slow.
2077
2078 Finally:
2079
sewardj20d38f22005-02-07 23:50:18 +00002080 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00002081 x#,
sewardj20d38f22005-02-07 23:50:18 +00002082 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00002083 )
2084
2085 Where:
2086
2087 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2088 PCast32x4(v#) = CmpNEZ32x4(v#)
2089*/
2090
2091static
2092IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2093{
2094 IRAtom* at;
2095 tl_assert(isShadowAtom(mce, vatomX));
2096 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002097 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002098 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00002099 return at;
2100}
2101
2102static
2103IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
2104{
2105 IRAtom* at;
2106 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002107 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00002108 return at;
2109}
2110
2111static
2112IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2113{
2114 IRAtom* at;
2115 tl_assert(isShadowAtom(mce, vatomX));
2116 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002117 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002118 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00002119 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002120 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00002121 return at;
2122}
2123
2124static
2125IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
2126{
2127 IRAtom* at;
2128 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002129 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00002130 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002131 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00002132 return at;
2133}
2134
sewardj0b070592004-12-10 21:44:22 +00002135/* --- ... and ... 64Fx2 versions of the same ... --- */
2136
2137static
2138IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2139{
2140 IRAtom* at;
2141 tl_assert(isShadowAtom(mce, vatomX));
2142 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002143 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002144 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00002145 return at;
2146}
2147
2148static
2149IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
2150{
2151 IRAtom* at;
2152 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002153 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002154 return at;
2155}
2156
2157static
2158IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2159{
2160 IRAtom* at;
2161 tl_assert(isShadowAtom(mce, vatomX));
2162 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002163 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002164 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00002165 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002166 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002167 return at;
2168}
2169
2170static
2171IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2172{
2173 IRAtom* at;
2174 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002175 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002176 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002177 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002178 return at;
2179}
2180
sewardj57f92b02010-08-22 11:54:14 +00002181/* --- --- ... and ... 32Fx2 versions of the same --- --- */
2182
2183static
2184IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2185{
2186 IRAtom* at;
2187 tl_assert(isShadowAtom(mce, vatomX));
2188 tl_assert(isShadowAtom(mce, vatomY));
2189 at = mkUifU64(mce, vatomX, vatomY);
2190 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2191 return at;
2192}
2193
2194static
2195IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2196{
2197 IRAtom* at;
2198 tl_assert(isShadowAtom(mce, vatomX));
2199 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2200 return at;
2201}
2202
sewardj350e8f72012-06-25 07:52:15 +00002203/* --- ... and ... 64Fx4 versions of the same ... --- */
2204
2205static
2206IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2207{
2208 IRAtom* at;
2209 tl_assert(isShadowAtom(mce, vatomX));
2210 tl_assert(isShadowAtom(mce, vatomY));
2211 at = mkUifUV256(mce, vatomX, vatomY);
2212 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2213 return at;
2214}
2215
2216static
2217IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2218{
2219 IRAtom* at;
2220 tl_assert(isShadowAtom(mce, vatomX));
2221 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2222 return at;
2223}
2224
2225/* --- ... and ... 32Fx8 versions of the same ... --- */
2226
2227static
2228IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2229{
2230 IRAtom* at;
2231 tl_assert(isShadowAtom(mce, vatomX));
2232 tl_assert(isShadowAtom(mce, vatomY));
2233 at = mkUifUV256(mce, vatomX, vatomY);
2234 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2235 return at;
2236}
2237
2238static
2239IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2240{
2241 IRAtom* at;
2242 tl_assert(isShadowAtom(mce, vatomX));
2243 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2244 return at;
2245}
2246
sewardja1d93302004-12-12 16:45:06 +00002247/* --- --- Vector saturated narrowing --- --- */
2248
sewardjb5a29232011-10-22 09:29:41 +00002249/* We used to do something very clever here, but on closer inspection
2250 (2011-Jun-15), and in particular bug #279698, it turns out to be
2251 wrong. Part of the problem came from the fact that for a long
2252 time, the IR primops to do with saturated narrowing were
2253 underspecified and managed to confuse multiple cases which needed
2254 to be separate: the op names had a signedness qualifier, but in
2255 fact the source and destination signednesses needed to be specified
2256 independently, so the op names really need two independent
2257 signedness specifiers.
sewardja1d93302004-12-12 16:45:06 +00002258
sewardjb5a29232011-10-22 09:29:41 +00002259 As of 2011-Jun-15 (ish) the underspecification was sorted out
2260 properly. The incorrect instrumentation remained, though. That
2261 has now (2011-Oct-22) been fixed.
sewardja1d93302004-12-12 16:45:06 +00002262
sewardjb5a29232011-10-22 09:29:41 +00002263 What we now do is simple:
sewardja1d93302004-12-12 16:45:06 +00002264
sewardjb5a29232011-10-22 09:29:41 +00002265 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2266 number of lanes, X is the source lane width and signedness, and Y
2267 is the destination lane width and signedness. In all cases the
2268 destination lane width is half the source lane width, so the names
2269 have a bit of redundancy, but are at least easy to read.
sewardja1d93302004-12-12 16:45:06 +00002270
sewardjb5a29232011-10-22 09:29:41 +00002271 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2272 to unsigned 16s.
sewardja1d93302004-12-12 16:45:06 +00002273
sewardjb5a29232011-10-22 09:29:41 +00002274 Let Vanilla(OP) be a function that takes OP, one of these
2275 saturating narrowing ops, and produces the same "shaped" narrowing
2276 op which is not saturating, but merely dumps the most significant
2277 bits. "same shape" means that the lane numbers and widths are the
2278 same as with OP.
sewardja1d93302004-12-12 16:45:06 +00002279
sewardjb5a29232011-10-22 09:29:41 +00002280 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2281 = Iop_NarrowBin32to16x8,
2282 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2283 dumping the top half of each lane.
sewardja1d93302004-12-12 16:45:06 +00002284
sewardjb5a29232011-10-22 09:29:41 +00002285 So, with that in place, the scheme is simple, and it is simple to
2286 pessimise each lane individually and then apply Vanilla(OP) so as
2287 to get the result in the right "shape". If the original OP is
2288 QNarrowBinXtoYxZ then we produce
sewardja1d93302004-12-12 16:45:06 +00002289
sewardjb5a29232011-10-22 09:29:41 +00002290 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
sewardj9beeb0a2011-06-15 15:11:07 +00002291
sewardjb5a29232011-10-22 09:29:41 +00002292 or for the case when OP is unary (Iop_QNarrowUn*)
2293
2294 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
sewardja1d93302004-12-12 16:45:06 +00002295*/
2296static
sewardjb5a29232011-10-22 09:29:41 +00002297IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2298{
2299 switch (qnarrowOp) {
2300 /* Binary: (128, 128) -> 128 */
2301 case Iop_QNarrowBin16Sto8Ux16:
2302 case Iop_QNarrowBin16Sto8Sx16:
2303 case Iop_QNarrowBin16Uto8Ux16:
2304 return Iop_NarrowBin16to8x16;
2305 case Iop_QNarrowBin32Sto16Ux8:
2306 case Iop_QNarrowBin32Sto16Sx8:
2307 case Iop_QNarrowBin32Uto16Ux8:
2308 return Iop_NarrowBin32to16x8;
2309 /* Binary: (64, 64) -> 64 */
2310 case Iop_QNarrowBin32Sto16Sx4:
2311 return Iop_NarrowBin32to16x4;
2312 case Iop_QNarrowBin16Sto8Ux8:
2313 case Iop_QNarrowBin16Sto8Sx8:
2314 return Iop_NarrowBin16to8x8;
2315 /* Unary: 128 -> 64 */
2316 case Iop_QNarrowUn64Uto32Ux2:
2317 case Iop_QNarrowUn64Sto32Sx2:
2318 case Iop_QNarrowUn64Sto32Ux2:
2319 return Iop_NarrowUn64to32x2;
2320 case Iop_QNarrowUn32Uto16Ux4:
2321 case Iop_QNarrowUn32Sto16Sx4:
2322 case Iop_QNarrowUn32Sto16Ux4:
2323 return Iop_NarrowUn32to16x4;
2324 case Iop_QNarrowUn16Uto8Ux8:
2325 case Iop_QNarrowUn16Sto8Sx8:
2326 case Iop_QNarrowUn16Sto8Ux8:
2327 return Iop_NarrowUn16to8x8;
2328 default:
2329 ppIROp(qnarrowOp);
2330 VG_(tool_panic)("vanillaNarrowOpOfShape");
2331 }
2332}
2333
2334static
sewardj7ee7d852011-06-16 11:37:21 +00002335IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2336 IRAtom* vatom1, IRAtom* vatom2)
sewardja1d93302004-12-12 16:45:06 +00002337{
2338 IRAtom *at1, *at2, *at3;
2339 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2340 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002341 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2342 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2343 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2344 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2345 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2346 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2347 default: VG_(tool_panic)("vectorNarrowBinV128");
sewardja1d93302004-12-12 16:45:06 +00002348 }
sewardjb5a29232011-10-22 09:29:41 +00002349 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardja1d93302004-12-12 16:45:06 +00002350 tl_assert(isShadowAtom(mce,vatom1));
2351 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002352 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2353 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002354 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00002355 return at3;
2356}
2357
sewardjacd2e912005-01-13 19:17:06 +00002358static
sewardj7ee7d852011-06-16 11:37:21 +00002359IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2360 IRAtom* vatom1, IRAtom* vatom2)
sewardjacd2e912005-01-13 19:17:06 +00002361{
2362 IRAtom *at1, *at2, *at3;
2363 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2364 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002365 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2366 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2367 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2368 default: VG_(tool_panic)("vectorNarrowBin64");
sewardjacd2e912005-01-13 19:17:06 +00002369 }
sewardjb5a29232011-10-22 09:29:41 +00002370 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardjacd2e912005-01-13 19:17:06 +00002371 tl_assert(isShadowAtom(mce,vatom1));
2372 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002373 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2374 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002375 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00002376 return at3;
2377}
2378
sewardj57f92b02010-08-22 11:54:14 +00002379static
sewardjb5a29232011-10-22 09:29:41 +00002380IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
sewardj7ee7d852011-06-16 11:37:21 +00002381 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002382{
2383 IRAtom *at1, *at2;
2384 IRAtom* (*pcast)( MCEnv*, IRAtom* );
sewardjb5a29232011-10-22 09:29:41 +00002385 tl_assert(isShadowAtom(mce,vatom1));
2386 /* For vanilla narrowing (non-saturating), we can just apply
2387 the op directly to the V bits. */
2388 switch (narrow_op) {
2389 case Iop_NarrowUn16to8x8:
2390 case Iop_NarrowUn32to16x4:
2391 case Iop_NarrowUn64to32x2:
2392 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2393 return at1;
2394 default:
2395 break; /* Do Plan B */
2396 }
2397 /* Plan B: for ops that involve a saturation operation on the args,
2398 we must PCast before the vanilla narrow. */
2399 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002400 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2401 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2402 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2403 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2404 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2405 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2406 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2407 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2408 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2409 default: VG_(tool_panic)("vectorNarrowUnV128");
sewardj57f92b02010-08-22 11:54:14 +00002410 }
sewardjb5a29232011-10-22 09:29:41 +00002411 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardj57f92b02010-08-22 11:54:14 +00002412 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
sewardjb5a29232011-10-22 09:29:41 +00002413 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
sewardj57f92b02010-08-22 11:54:14 +00002414 return at2;
2415}
2416
2417static
sewardj7ee7d852011-06-16 11:37:21 +00002418IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2419 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002420{
2421 IRAtom *at1, *at2;
2422 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2423 switch (longen_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002424 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2425 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2426 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2427 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2428 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2429 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2430 default: VG_(tool_panic)("vectorWidenI64");
sewardj57f92b02010-08-22 11:54:14 +00002431 }
2432 tl_assert(isShadowAtom(mce,vatom1));
2433 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2434 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2435 return at2;
2436}
2437
sewardja1d93302004-12-12 16:45:06 +00002438
2439/* --- --- Vector integer arithmetic --- --- */
2440
2441/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00002442
sewardja2f30952013-03-27 11:40:02 +00002443/* --- V256-bit versions --- */
2444
2445static
2446IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2447{
2448 IRAtom* at;
2449 at = mkUifUV256(mce, vatom1, vatom2);
2450 at = mkPCast8x32(mce, at);
2451 return at;
2452}
2453
2454static
2455IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2456{
2457 IRAtom* at;
2458 at = mkUifUV256(mce, vatom1, vatom2);
2459 at = mkPCast16x16(mce, at);
2460 return at;
2461}
2462
2463static
2464IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2465{
2466 IRAtom* at;
2467 at = mkUifUV256(mce, vatom1, vatom2);
2468 at = mkPCast32x8(mce, at);
2469 return at;
2470}
2471
2472static
2473IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2474{
2475 IRAtom* at;
2476 at = mkUifUV256(mce, vatom1, vatom2);
2477 at = mkPCast64x4(mce, at);
2478 return at;
2479}
2480
sewardj20d38f22005-02-07 23:50:18 +00002481/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00002482
sewardja1d93302004-12-12 16:45:06 +00002483static
2484IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2485{
2486 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002487 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002488 at = mkPCast8x16(mce, at);
2489 return at;
2490}
2491
2492static
2493IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2494{
2495 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002496 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002497 at = mkPCast16x8(mce, at);
2498 return at;
2499}
2500
2501static
2502IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2503{
2504 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002505 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002506 at = mkPCast32x4(mce, at);
2507 return at;
2508}
2509
2510static
2511IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2512{
2513 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002514 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002515 at = mkPCast64x2(mce, at);
2516 return at;
2517}
sewardj3245c912004-12-10 14:58:26 +00002518
sewardjacd2e912005-01-13 19:17:06 +00002519/* --- 64-bit versions --- */
2520
2521static
2522IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2523{
2524 IRAtom* at;
2525 at = mkUifU64(mce, vatom1, vatom2);
2526 at = mkPCast8x8(mce, at);
2527 return at;
2528}
2529
2530static
2531IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2532{
2533 IRAtom* at;
2534 at = mkUifU64(mce, vatom1, vatom2);
2535 at = mkPCast16x4(mce, at);
2536 return at;
2537}
2538
2539static
2540IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2541{
2542 IRAtom* at;
2543 at = mkUifU64(mce, vatom1, vatom2);
2544 at = mkPCast32x2(mce, at);
2545 return at;
2546}
2547
sewardj57f92b02010-08-22 11:54:14 +00002548static
2549IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2550{
2551 IRAtom* at;
2552 at = mkUifU64(mce, vatom1, vatom2);
2553 at = mkPCastTo(mce, Ity_I64, at);
2554 return at;
2555}
2556
sewardjc678b852010-09-22 00:58:51 +00002557/* --- 32-bit versions --- */
2558
2559static
2560IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2561{
2562 IRAtom* at;
2563 at = mkUifU32(mce, vatom1, vatom2);
2564 at = mkPCast8x4(mce, at);
2565 return at;
2566}
2567
2568static
2569IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2570{
2571 IRAtom* at;
2572 at = mkUifU32(mce, vatom1, vatom2);
2573 at = mkPCast16x2(mce, at);
2574 return at;
2575}
2576
sewardj3245c912004-12-10 14:58:26 +00002577
2578/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002579/*--- Generate shadow values from all kinds of IRExprs. ---*/
2580/*------------------------------------------------------------*/
2581
2582static
sewardje91cea72006-02-08 19:32:02 +00002583IRAtom* expr2vbits_Qop ( MCEnv* mce,
2584 IROp op,
2585 IRAtom* atom1, IRAtom* atom2,
2586 IRAtom* atom3, IRAtom* atom4 )
2587{
2588 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2589 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2590 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2591 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2592
2593 tl_assert(isOriginalAtom(mce,atom1));
2594 tl_assert(isOriginalAtom(mce,atom2));
2595 tl_assert(isOriginalAtom(mce,atom3));
2596 tl_assert(isOriginalAtom(mce,atom4));
2597 tl_assert(isShadowAtom(mce,vatom1));
2598 tl_assert(isShadowAtom(mce,vatom2));
2599 tl_assert(isShadowAtom(mce,vatom3));
2600 tl_assert(isShadowAtom(mce,vatom4));
2601 tl_assert(sameKindedAtoms(atom1,vatom1));
2602 tl_assert(sameKindedAtoms(atom2,vatom2));
2603 tl_assert(sameKindedAtoms(atom3,vatom3));
2604 tl_assert(sameKindedAtoms(atom4,vatom4));
2605 switch (op) {
2606 case Iop_MAddF64:
2607 case Iop_MAddF64r32:
2608 case Iop_MSubF64:
2609 case Iop_MSubF64r32:
2610 /* I32(rm) x F64 x F64 x F64 -> F64 */
2611 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
sewardjb5b87402011-03-07 16:05:35 +00002612
2613 case Iop_MAddF32:
2614 case Iop_MSubF32:
2615 /* I32(rm) x F32 x F32 x F32 -> F32 */
2616 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2617
sewardj350e8f72012-06-25 07:52:15 +00002618 /* V256-bit data-steering */
2619 case Iop_64x4toV256:
2620 return assignNew('V', mce, Ity_V256,
2621 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2622
sewardje91cea72006-02-08 19:32:02 +00002623 default:
2624 ppIROp(op);
2625 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2626 }
2627}
2628
2629
2630static
sewardjed69fdb2006-02-03 16:12:27 +00002631IRAtom* expr2vbits_Triop ( MCEnv* mce,
2632 IROp op,
2633 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2634{
sewardjed69fdb2006-02-03 16:12:27 +00002635 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2636 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2637 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2638
2639 tl_assert(isOriginalAtom(mce,atom1));
2640 tl_assert(isOriginalAtom(mce,atom2));
2641 tl_assert(isOriginalAtom(mce,atom3));
2642 tl_assert(isShadowAtom(mce,vatom1));
2643 tl_assert(isShadowAtom(mce,vatom2));
2644 tl_assert(isShadowAtom(mce,vatom3));
2645 tl_assert(sameKindedAtoms(atom1,vatom1));
2646 tl_assert(sameKindedAtoms(atom2,vatom2));
2647 tl_assert(sameKindedAtoms(atom3,vatom3));
2648 switch (op) {
sewardjb5b87402011-03-07 16:05:35 +00002649 case Iop_AddF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002650 case Iop_AddD128:
sewardjb5b87402011-03-07 16:05:35 +00002651 case Iop_SubF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002652 case Iop_SubD128:
sewardjb5b87402011-03-07 16:05:35 +00002653 case Iop_MulF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002654 case Iop_MulD128:
sewardjb5b87402011-03-07 16:05:35 +00002655 case Iop_DivF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002656 case Iop_DivD128:
sewardj18c72fa2012-04-23 11:22:05 +00002657 case Iop_QuantizeD128:
2658 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
sewardjb5b87402011-03-07 16:05:35 +00002659 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardjed69fdb2006-02-03 16:12:27 +00002660 case Iop_AddF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002661 case Iop_AddD64:
sewardjed69fdb2006-02-03 16:12:27 +00002662 case Iop_AddF64r32:
2663 case Iop_SubF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002664 case Iop_SubD64:
sewardjed69fdb2006-02-03 16:12:27 +00002665 case Iop_SubF64r32:
2666 case Iop_MulF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002667 case Iop_MulD64:
sewardjed69fdb2006-02-03 16:12:27 +00002668 case Iop_MulF64r32:
2669 case Iop_DivF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002670 case Iop_DivD64:
sewardjed69fdb2006-02-03 16:12:27 +00002671 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002672 case Iop_ScaleF64:
2673 case Iop_Yl2xF64:
2674 case Iop_Yl2xp1F64:
2675 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002676 case Iop_PRemF64:
2677 case Iop_PRem1F64:
sewardj18c72fa2012-04-23 11:22:05 +00002678 case Iop_QuantizeD64:
2679 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
sewardjed69fdb2006-02-03 16:12:27 +00002680 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002681 case Iop_PRemC3210F64:
2682 case Iop_PRem1C3210F64:
2683 /* I32(rm) x F64 x F64 -> I32 */
2684 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj59570ff2010-01-01 11:59:33 +00002685 case Iop_AddF32:
2686 case Iop_SubF32:
2687 case Iop_MulF32:
2688 case Iop_DivF32:
2689 /* I32(rm) x F32 x F32 -> I32 */
2690 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj18c72fa2012-04-23 11:22:05 +00002691 case Iop_SignificanceRoundD64:
florian733b4db2013-06-06 19:13:29 +00002692 /* IRRoundingMode(I32) x I8 x D64 -> D64 */
sewardj18c72fa2012-04-23 11:22:05 +00002693 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2694 case Iop_SignificanceRoundD128:
florian733b4db2013-06-06 19:13:29 +00002695 /* IRRoundingMode(I32) x I8 x D128 -> D128 */
sewardj18c72fa2012-04-23 11:22:05 +00002696 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardj57f92b02010-08-22 11:54:14 +00002697 case Iop_ExtractV128:
sewardjb9e6d242013-05-11 13:42:08 +00002698 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002699 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2700 case Iop_Extract64:
sewardjb9e6d242013-05-11 13:42:08 +00002701 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002702 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2703 case Iop_SetElem8x8:
2704 case Iop_SetElem16x4:
2705 case Iop_SetElem32x2:
sewardjb9e6d242013-05-11 13:42:08 +00002706 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002707 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
sewardjed69fdb2006-02-03 16:12:27 +00002708 default:
2709 ppIROp(op);
2710 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2711 }
2712}
2713
2714
2715static
sewardj95448072004-11-22 20:19:51 +00002716IRAtom* expr2vbits_Binop ( MCEnv* mce,
2717 IROp op,
2718 IRAtom* atom1, IRAtom* atom2 )
2719{
2720 IRType and_or_ty;
2721 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2722 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2723 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2724
2725 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2726 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2727
2728 tl_assert(isOriginalAtom(mce,atom1));
2729 tl_assert(isOriginalAtom(mce,atom2));
2730 tl_assert(isShadowAtom(mce,vatom1));
2731 tl_assert(isShadowAtom(mce,vatom2));
2732 tl_assert(sameKindedAtoms(atom1,vatom1));
2733 tl_assert(sameKindedAtoms(atom2,vatom2));
2734 switch (op) {
2735
sewardjc678b852010-09-22 00:58:51 +00002736 /* 32-bit SIMD */
2737
2738 case Iop_Add16x2:
2739 case Iop_HAdd16Ux2:
2740 case Iop_HAdd16Sx2:
2741 case Iop_Sub16x2:
2742 case Iop_HSub16Ux2:
2743 case Iop_HSub16Sx2:
2744 case Iop_QAdd16Sx2:
2745 case Iop_QSub16Sx2:
sewardj9fb31092012-09-17 15:28:46 +00002746 case Iop_QSub16Ux2:
sewardj7a370652013-07-04 20:37:33 +00002747 case Iop_QAdd16Ux2:
sewardjc678b852010-09-22 00:58:51 +00002748 return binary16Ix2(mce, vatom1, vatom2);
2749
2750 case Iop_Add8x4:
2751 case Iop_HAdd8Ux4:
2752 case Iop_HAdd8Sx4:
2753 case Iop_Sub8x4:
2754 case Iop_HSub8Ux4:
2755 case Iop_HSub8Sx4:
2756 case Iop_QSub8Ux4:
2757 case Iop_QAdd8Ux4:
2758 case Iop_QSub8Sx4:
2759 case Iop_QAdd8Sx4:
2760 return binary8Ix4(mce, vatom1, vatom2);
2761
sewardjacd2e912005-01-13 19:17:06 +00002762 /* 64-bit SIMD */
2763
sewardj57f92b02010-08-22 11:54:14 +00002764 case Iop_ShrN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002765 case Iop_ShrN16x4:
2766 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002767 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002768 case Iop_SarN16x4:
2769 case Iop_SarN32x2:
2770 case Iop_ShlN16x4:
2771 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002772 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002773 /* Same scheme as with all other shifts. */
sewardjb9e6d242013-05-11 13:42:08 +00002774 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00002775 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002776
sewardj7ee7d852011-06-16 11:37:21 +00002777 case Iop_QNarrowBin32Sto16Sx4:
2778 case Iop_QNarrowBin16Sto8Sx8:
2779 case Iop_QNarrowBin16Sto8Ux8:
2780 return vectorNarrowBin64(mce, op, vatom1, vatom2);
sewardjacd2e912005-01-13 19:17:06 +00002781
2782 case Iop_Min8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002783 case Iop_Min8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002784 case Iop_Max8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002785 case Iop_Max8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002786 case Iop_Avg8Ux8:
2787 case Iop_QSub8Sx8:
2788 case Iop_QSub8Ux8:
2789 case Iop_Sub8x8:
2790 case Iop_CmpGT8Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002791 case Iop_CmpGT8Ux8:
sewardjacd2e912005-01-13 19:17:06 +00002792 case Iop_CmpEQ8x8:
2793 case Iop_QAdd8Sx8:
2794 case Iop_QAdd8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002795 case Iop_QSal8x8:
2796 case Iop_QShl8x8:
sewardjacd2e912005-01-13 19:17:06 +00002797 case Iop_Add8x8:
sewardj57f92b02010-08-22 11:54:14 +00002798 case Iop_Mul8x8:
2799 case Iop_PolynomialMul8x8:
sewardjacd2e912005-01-13 19:17:06 +00002800 return binary8Ix8(mce, vatom1, vatom2);
2801
2802 case Iop_Min16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002803 case Iop_Min16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002804 case Iop_Max16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002805 case Iop_Max16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002806 case Iop_Avg16Ux4:
2807 case Iop_QSub16Ux4:
2808 case Iop_QSub16Sx4:
2809 case Iop_Sub16x4:
2810 case Iop_Mul16x4:
2811 case Iop_MulHi16Sx4:
2812 case Iop_MulHi16Ux4:
2813 case Iop_CmpGT16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002814 case Iop_CmpGT16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002815 case Iop_CmpEQ16x4:
2816 case Iop_QAdd16Sx4:
2817 case Iop_QAdd16Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002818 case Iop_QSal16x4:
2819 case Iop_QShl16x4:
sewardjacd2e912005-01-13 19:17:06 +00002820 case Iop_Add16x4:
sewardj57f92b02010-08-22 11:54:14 +00002821 case Iop_QDMulHi16Sx4:
2822 case Iop_QRDMulHi16Sx4:
sewardjacd2e912005-01-13 19:17:06 +00002823 return binary16Ix4(mce, vatom1, vatom2);
2824
2825 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00002826 case Iop_Mul32x2:
sewardj57f92b02010-08-22 11:54:14 +00002827 case Iop_Max32Sx2:
2828 case Iop_Max32Ux2:
2829 case Iop_Min32Sx2:
2830 case Iop_Min32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002831 case Iop_CmpGT32Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002832 case Iop_CmpGT32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002833 case Iop_CmpEQ32x2:
2834 case Iop_Add32x2:
sewardj57f92b02010-08-22 11:54:14 +00002835 case Iop_QAdd32Ux2:
2836 case Iop_QAdd32Sx2:
2837 case Iop_QSub32Ux2:
2838 case Iop_QSub32Sx2:
2839 case Iop_QSal32x2:
2840 case Iop_QShl32x2:
2841 case Iop_QDMulHi32Sx2:
2842 case Iop_QRDMulHi32Sx2:
sewardjacd2e912005-01-13 19:17:06 +00002843 return binary32Ix2(mce, vatom1, vatom2);
2844
sewardj57f92b02010-08-22 11:54:14 +00002845 case Iop_QSub64Ux1:
2846 case Iop_QSub64Sx1:
2847 case Iop_QAdd64Ux1:
2848 case Iop_QAdd64Sx1:
2849 case Iop_QSal64x1:
2850 case Iop_QShl64x1:
2851 case Iop_Sal64x1:
2852 return binary64Ix1(mce, vatom1, vatom2);
2853
2854 case Iop_QShlN8Sx8:
2855 case Iop_QShlN8x8:
2856 case Iop_QSalN8x8:
sewardjb9e6d242013-05-11 13:42:08 +00002857 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002858 return mkPCast8x8(mce, vatom1);
2859
2860 case Iop_QShlN16Sx4:
2861 case Iop_QShlN16x4:
2862 case Iop_QSalN16x4:
sewardjb9e6d242013-05-11 13:42:08 +00002863 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002864 return mkPCast16x4(mce, vatom1);
2865
2866 case Iop_QShlN32Sx2:
2867 case Iop_QShlN32x2:
2868 case Iop_QSalN32x2:
sewardjb9e6d242013-05-11 13:42:08 +00002869 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002870 return mkPCast32x2(mce, vatom1);
2871
2872 case Iop_QShlN64Sx1:
2873 case Iop_QShlN64x1:
2874 case Iop_QSalN64x1:
sewardjb9e6d242013-05-11 13:42:08 +00002875 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002876 return mkPCast32x2(mce, vatom1);
2877
2878 case Iop_PwMax32Sx2:
2879 case Iop_PwMax32Ux2:
2880 case Iop_PwMin32Sx2:
2881 case Iop_PwMin32Ux2:
2882 case Iop_PwMax32Fx2:
2883 case Iop_PwMin32Fx2:
sewardj350e8f72012-06-25 07:52:15 +00002884 return assignNew('V', mce, Ity_I64,
2885 binop(Iop_PwMax32Ux2,
2886 mkPCast32x2(mce, vatom1),
2887 mkPCast32x2(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002888
2889 case Iop_PwMax16Sx4:
2890 case Iop_PwMax16Ux4:
2891 case Iop_PwMin16Sx4:
2892 case Iop_PwMin16Ux4:
sewardj350e8f72012-06-25 07:52:15 +00002893 return assignNew('V', mce, Ity_I64,
2894 binop(Iop_PwMax16Ux4,
2895 mkPCast16x4(mce, vatom1),
2896 mkPCast16x4(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002897
2898 case Iop_PwMax8Sx8:
2899 case Iop_PwMax8Ux8:
2900 case Iop_PwMin8Sx8:
2901 case Iop_PwMin8Ux8:
sewardj350e8f72012-06-25 07:52:15 +00002902 return assignNew('V', mce, Ity_I64,
2903 binop(Iop_PwMax8Ux8,
2904 mkPCast8x8(mce, vatom1),
2905 mkPCast8x8(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002906
2907 case Iop_PwAdd32x2:
2908 case Iop_PwAdd32Fx2:
2909 return mkPCast32x2(mce,
sewardj350e8f72012-06-25 07:52:15 +00002910 assignNew('V', mce, Ity_I64,
2911 binop(Iop_PwAdd32x2,
2912 mkPCast32x2(mce, vatom1),
2913 mkPCast32x2(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002914
2915 case Iop_PwAdd16x4:
2916 return mkPCast16x4(mce,
sewardj350e8f72012-06-25 07:52:15 +00002917 assignNew('V', mce, Ity_I64,
2918 binop(op, mkPCast16x4(mce, vatom1),
2919 mkPCast16x4(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002920
2921 case Iop_PwAdd8x8:
2922 return mkPCast8x8(mce,
sewardj350e8f72012-06-25 07:52:15 +00002923 assignNew('V', mce, Ity_I64,
2924 binop(op, mkPCast8x8(mce, vatom1),
2925 mkPCast8x8(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002926
2927 case Iop_Shl8x8:
2928 case Iop_Shr8x8:
2929 case Iop_Sar8x8:
2930 case Iop_Sal8x8:
2931 return mkUifU64(mce,
2932 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2933 mkPCast8x8(mce,vatom2)
2934 );
2935
2936 case Iop_Shl16x4:
2937 case Iop_Shr16x4:
2938 case Iop_Sar16x4:
2939 case Iop_Sal16x4:
2940 return mkUifU64(mce,
2941 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2942 mkPCast16x4(mce,vatom2)
2943 );
2944
2945 case Iop_Shl32x2:
2946 case Iop_Shr32x2:
2947 case Iop_Sar32x2:
2948 case Iop_Sal32x2:
2949 return mkUifU64(mce,
2950 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2951 mkPCast32x2(mce,vatom2)
2952 );
2953
sewardjacd2e912005-01-13 19:17:06 +00002954 /* 64-bit data-steering */
2955 case Iop_InterleaveLO32x2:
2956 case Iop_InterleaveLO16x4:
2957 case Iop_InterleaveLO8x8:
2958 case Iop_InterleaveHI32x2:
2959 case Iop_InterleaveHI16x4:
2960 case Iop_InterleaveHI8x8:
sewardj57f92b02010-08-22 11:54:14 +00002961 case Iop_CatOddLanes8x8:
2962 case Iop_CatEvenLanes8x8:
sewardj114a9172008-02-09 01:49:32 +00002963 case Iop_CatOddLanes16x4:
2964 case Iop_CatEvenLanes16x4:
sewardj57f92b02010-08-22 11:54:14 +00002965 case Iop_InterleaveOddLanes8x8:
2966 case Iop_InterleaveEvenLanes8x8:
2967 case Iop_InterleaveOddLanes16x4:
2968 case Iop_InterleaveEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002969 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00002970
sewardj57f92b02010-08-22 11:54:14 +00002971 case Iop_GetElem8x8:
sewardjb9e6d242013-05-11 13:42:08 +00002972 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002973 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2974 case Iop_GetElem16x4:
sewardjb9e6d242013-05-11 13:42:08 +00002975 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002976 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2977 case Iop_GetElem32x2:
sewardjb9e6d242013-05-11 13:42:08 +00002978 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002979 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2980
sewardj114a9172008-02-09 01:49:32 +00002981 /* Perm8x8: rearrange values in left arg using steering values
2982 from right arg. So rearrange the vbits in the same way but
2983 pessimise wrt steering values. */
2984 case Iop_Perm8x8:
2985 return mkUifU64(
2986 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002987 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00002988 mkPCast8x8(mce, vatom2)
2989 );
2990
sewardj20d38f22005-02-07 23:50:18 +00002991 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00002992
sewardj57f92b02010-08-22 11:54:14 +00002993 case Iop_ShrN8x16:
sewardja1d93302004-12-12 16:45:06 +00002994 case Iop_ShrN16x8:
2995 case Iop_ShrN32x4:
2996 case Iop_ShrN64x2:
sewardj57f92b02010-08-22 11:54:14 +00002997 case Iop_SarN8x16:
sewardja1d93302004-12-12 16:45:06 +00002998 case Iop_SarN16x8:
2999 case Iop_SarN32x4:
sewardj57f92b02010-08-22 11:54:14 +00003000 case Iop_SarN64x2:
3001 case Iop_ShlN8x16:
sewardja1d93302004-12-12 16:45:06 +00003002 case Iop_ShlN16x8:
3003 case Iop_ShlN32x4:
3004 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00003005 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3006 this is wrong now, scalar shifts are done properly lazily.
3007 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003008 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00003009 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00003010
sewardjcbf8be72005-11-10 18:34:41 +00003011 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00003012 case Iop_Shl8x16:
3013 case Iop_Shr8x16:
3014 case Iop_Sar8x16:
sewardj57f92b02010-08-22 11:54:14 +00003015 case Iop_Sal8x16:
sewardjcbf8be72005-11-10 18:34:41 +00003016 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00003017 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003018 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003019 mkPCast8x16(mce,vatom2)
3020 );
3021
3022 case Iop_Shl16x8:
3023 case Iop_Shr16x8:
3024 case Iop_Sar16x8:
sewardj57f92b02010-08-22 11:54:14 +00003025 case Iop_Sal16x8:
sewardjcbf8be72005-11-10 18:34:41 +00003026 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00003027 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003028 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003029 mkPCast16x8(mce,vatom2)
3030 );
3031
3032 case Iop_Shl32x4:
3033 case Iop_Shr32x4:
3034 case Iop_Sar32x4:
sewardj57f92b02010-08-22 11:54:14 +00003035 case Iop_Sal32x4:
sewardjcbf8be72005-11-10 18:34:41 +00003036 case Iop_Rol32x4:
sewardj43d60752005-11-10 18:13:01 +00003037 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003038 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003039 mkPCast32x4(mce,vatom2)
3040 );
3041
sewardj57f92b02010-08-22 11:54:14 +00003042 case Iop_Shl64x2:
3043 case Iop_Shr64x2:
3044 case Iop_Sar64x2:
3045 case Iop_Sal64x2:
3046 return mkUifUV128(mce,
3047 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3048 mkPCast64x2(mce,vatom2)
3049 );
3050
3051 case Iop_F32ToFixed32Ux4_RZ:
3052 case Iop_F32ToFixed32Sx4_RZ:
3053 case Iop_Fixed32UToF32x4_RN:
3054 case Iop_Fixed32SToF32x4_RN:
sewardjb9e6d242013-05-11 13:42:08 +00003055 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003056 return mkPCast32x4(mce, vatom1);
3057
3058 case Iop_F32ToFixed32Ux2_RZ:
3059 case Iop_F32ToFixed32Sx2_RZ:
3060 case Iop_Fixed32UToF32x2_RN:
3061 case Iop_Fixed32SToF32x2_RN:
sewardjb9e6d242013-05-11 13:42:08 +00003062 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003063 return mkPCast32x2(mce, vatom1);
3064
sewardja1d93302004-12-12 16:45:06 +00003065 case Iop_QSub8Ux16:
3066 case Iop_QSub8Sx16:
3067 case Iop_Sub8x16:
3068 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003069 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003070 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003071 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003072 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00003073 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00003074 case Iop_CmpEQ8x16:
3075 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003076 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003077 case Iop_QAdd8Ux16:
3078 case Iop_QAdd8Sx16:
sewardj57f92b02010-08-22 11:54:14 +00003079 case Iop_QSal8x16:
3080 case Iop_QShl8x16:
sewardja1d93302004-12-12 16:45:06 +00003081 case Iop_Add8x16:
sewardj57f92b02010-08-22 11:54:14 +00003082 case Iop_Mul8x16:
3083 case Iop_PolynomialMul8x16:
sewardja1d93302004-12-12 16:45:06 +00003084 return binary8Ix16(mce, vatom1, vatom2);
3085
3086 case Iop_QSub16Ux8:
3087 case Iop_QSub16Sx8:
3088 case Iop_Sub16x8:
3089 case Iop_Mul16x8:
3090 case Iop_MulHi16Sx8:
3091 case Iop_MulHi16Ux8:
3092 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003093 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003094 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003095 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003096 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003097 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003098 case Iop_CmpEQ16x8:
3099 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00003100 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00003101 case Iop_QAdd16Ux8:
3102 case Iop_QAdd16Sx8:
sewardj57f92b02010-08-22 11:54:14 +00003103 case Iop_QSal16x8:
3104 case Iop_QShl16x8:
sewardja1d93302004-12-12 16:45:06 +00003105 case Iop_Add16x8:
sewardj57f92b02010-08-22 11:54:14 +00003106 case Iop_QDMulHi16Sx8:
3107 case Iop_QRDMulHi16Sx8:
sewardja1d93302004-12-12 16:45:06 +00003108 return binary16Ix8(mce, vatom1, vatom2);
3109
3110 case Iop_Sub32x4:
3111 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00003112 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00003113 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00003114 case Iop_QAdd32Sx4:
3115 case Iop_QAdd32Ux4:
3116 case Iop_QSub32Sx4:
3117 case Iop_QSub32Ux4:
sewardj57f92b02010-08-22 11:54:14 +00003118 case Iop_QSal32x4:
3119 case Iop_QShl32x4:
sewardj43d60752005-11-10 18:13:01 +00003120 case Iop_Avg32Ux4:
3121 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00003122 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00003123 case Iop_Max32Ux4:
3124 case Iop_Max32Sx4:
3125 case Iop_Min32Ux4:
3126 case Iop_Min32Sx4:
sewardjb823b852010-06-18 08:18:38 +00003127 case Iop_Mul32x4:
sewardj57f92b02010-08-22 11:54:14 +00003128 case Iop_QDMulHi32Sx4:
3129 case Iop_QRDMulHi32Sx4:
sewardja1d93302004-12-12 16:45:06 +00003130 return binary32Ix4(mce, vatom1, vatom2);
3131
3132 case Iop_Sub64x2:
3133 case Iop_Add64x2:
sewardj9a2afe92011-10-19 15:24:55 +00003134 case Iop_CmpEQ64x2:
sewardjb823b852010-06-18 08:18:38 +00003135 case Iop_CmpGT64Sx2:
sewardj57f92b02010-08-22 11:54:14 +00003136 case Iop_QSal64x2:
3137 case Iop_QShl64x2:
3138 case Iop_QAdd64Ux2:
3139 case Iop_QAdd64Sx2:
3140 case Iop_QSub64Ux2:
3141 case Iop_QSub64Sx2:
sewardja1d93302004-12-12 16:45:06 +00003142 return binary64Ix2(mce, vatom1, vatom2);
3143
sewardj7ee7d852011-06-16 11:37:21 +00003144 case Iop_QNarrowBin32Sto16Sx8:
3145 case Iop_QNarrowBin32Uto16Ux8:
3146 case Iop_QNarrowBin32Sto16Ux8:
3147 case Iop_QNarrowBin16Sto8Sx16:
3148 case Iop_QNarrowBin16Uto8Ux16:
3149 case Iop_QNarrowBin16Sto8Ux16:
3150 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00003151
sewardj0b070592004-12-10 21:44:22 +00003152 case Iop_Sub64Fx2:
3153 case Iop_Mul64Fx2:
3154 case Iop_Min64Fx2:
3155 case Iop_Max64Fx2:
3156 case Iop_Div64Fx2:
3157 case Iop_CmpLT64Fx2:
3158 case Iop_CmpLE64Fx2:
3159 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00003160 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00003161 case Iop_Add64Fx2:
3162 return binary64Fx2(mce, vatom1, vatom2);
3163
3164 case Iop_Sub64F0x2:
3165 case Iop_Mul64F0x2:
3166 case Iop_Min64F0x2:
3167 case Iop_Max64F0x2:
3168 case Iop_Div64F0x2:
3169 case Iop_CmpLT64F0x2:
3170 case Iop_CmpLE64F0x2:
3171 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00003172 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00003173 case Iop_Add64F0x2:
3174 return binary64F0x2(mce, vatom1, vatom2);
3175
sewardj170ee212004-12-10 18:57:51 +00003176 case Iop_Sub32Fx4:
3177 case Iop_Mul32Fx4:
3178 case Iop_Min32Fx4:
3179 case Iop_Max32Fx4:
3180 case Iop_Div32Fx4:
3181 case Iop_CmpLT32Fx4:
3182 case Iop_CmpLE32Fx4:
3183 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00003184 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00003185 case Iop_CmpGT32Fx4:
3186 case Iop_CmpGE32Fx4:
sewardj3245c912004-12-10 14:58:26 +00003187 case Iop_Add32Fx4:
sewardj57f92b02010-08-22 11:54:14 +00003188 case Iop_Recps32Fx4:
3189 case Iop_Rsqrts32Fx4:
sewardj3245c912004-12-10 14:58:26 +00003190 return binary32Fx4(mce, vatom1, vatom2);
3191
sewardj57f92b02010-08-22 11:54:14 +00003192 case Iop_Sub32Fx2:
3193 case Iop_Mul32Fx2:
3194 case Iop_Min32Fx2:
3195 case Iop_Max32Fx2:
3196 case Iop_CmpEQ32Fx2:
3197 case Iop_CmpGT32Fx2:
3198 case Iop_CmpGE32Fx2:
3199 case Iop_Add32Fx2:
3200 case Iop_Recps32Fx2:
3201 case Iop_Rsqrts32Fx2:
3202 return binary32Fx2(mce, vatom1, vatom2);
3203
sewardj170ee212004-12-10 18:57:51 +00003204 case Iop_Sub32F0x4:
3205 case Iop_Mul32F0x4:
3206 case Iop_Min32F0x4:
3207 case Iop_Max32F0x4:
3208 case Iop_Div32F0x4:
3209 case Iop_CmpLT32F0x4:
3210 case Iop_CmpLE32F0x4:
3211 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00003212 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00003213 case Iop_Add32F0x4:
3214 return binary32F0x4(mce, vatom1, vatom2);
3215
sewardj57f92b02010-08-22 11:54:14 +00003216 case Iop_QShlN8Sx16:
3217 case Iop_QShlN8x16:
3218 case Iop_QSalN8x16:
sewardjb9e6d242013-05-11 13:42:08 +00003219 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003220 return mkPCast8x16(mce, vatom1);
3221
3222 case Iop_QShlN16Sx8:
3223 case Iop_QShlN16x8:
3224 case Iop_QSalN16x8:
sewardjb9e6d242013-05-11 13:42:08 +00003225 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003226 return mkPCast16x8(mce, vatom1);
3227
3228 case Iop_QShlN32Sx4:
3229 case Iop_QShlN32x4:
3230 case Iop_QSalN32x4:
sewardjb9e6d242013-05-11 13:42:08 +00003231 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003232 return mkPCast32x4(mce, vatom1);
3233
3234 case Iop_QShlN64Sx2:
3235 case Iop_QShlN64x2:
3236 case Iop_QSalN64x2:
sewardjb9e6d242013-05-11 13:42:08 +00003237 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003238 return mkPCast32x4(mce, vatom1);
3239
3240 case Iop_Mull32Sx2:
3241 case Iop_Mull32Ux2:
3242 case Iop_QDMulLong32Sx2:
sewardj7ee7d852011-06-16 11:37:21 +00003243 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3244 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003245
3246 case Iop_Mull16Sx4:
3247 case Iop_Mull16Ux4:
3248 case Iop_QDMulLong16Sx4:
sewardj7ee7d852011-06-16 11:37:21 +00003249 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3250 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003251
3252 case Iop_Mull8Sx8:
3253 case Iop_Mull8Ux8:
3254 case Iop_PolynomialMull8x8:
sewardj7ee7d852011-06-16 11:37:21 +00003255 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3256 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003257
3258 case Iop_PwAdd32x4:
3259 return mkPCast32x4(mce,
3260 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3261 mkPCast32x4(mce, vatom2))));
3262
3263 case Iop_PwAdd16x8:
3264 return mkPCast16x8(mce,
3265 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3266 mkPCast16x8(mce, vatom2))));
3267
3268 case Iop_PwAdd8x16:
3269 return mkPCast8x16(mce,
3270 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3271 mkPCast8x16(mce, vatom2))));
3272
sewardj20d38f22005-02-07 23:50:18 +00003273 /* V128-bit data-steering */
3274 case Iop_SetV128lo32:
3275 case Iop_SetV128lo64:
3276 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00003277 case Iop_InterleaveLO64x2:
3278 case Iop_InterleaveLO32x4:
3279 case Iop_InterleaveLO16x8:
3280 case Iop_InterleaveLO8x16:
3281 case Iop_InterleaveHI64x2:
3282 case Iop_InterleaveHI32x4:
3283 case Iop_InterleaveHI16x8:
3284 case Iop_InterleaveHI8x16:
sewardj57f92b02010-08-22 11:54:14 +00003285 case Iop_CatOddLanes8x16:
3286 case Iop_CatOddLanes16x8:
3287 case Iop_CatOddLanes32x4:
3288 case Iop_CatEvenLanes8x16:
3289 case Iop_CatEvenLanes16x8:
3290 case Iop_CatEvenLanes32x4:
3291 case Iop_InterleaveOddLanes8x16:
3292 case Iop_InterleaveOddLanes16x8:
3293 case Iop_InterleaveOddLanes32x4:
3294 case Iop_InterleaveEvenLanes8x16:
3295 case Iop_InterleaveEvenLanes16x8:
3296 case Iop_InterleaveEvenLanes32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003297 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003298
3299 case Iop_GetElem8x16:
sewardjb9e6d242013-05-11 13:42:08 +00003300 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003301 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3302 case Iop_GetElem16x8:
sewardjb9e6d242013-05-11 13:42:08 +00003303 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003304 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3305 case Iop_GetElem32x4:
sewardjb9e6d242013-05-11 13:42:08 +00003306 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003307 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3308 case Iop_GetElem64x2:
sewardjb9e6d242013-05-11 13:42:08 +00003309 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003310 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3311
sewardj620eb5b2005-10-22 12:50:43 +00003312 /* Perm8x16: rearrange values in left arg using steering values
3313 from right arg. So rearrange the vbits in the same way but
sewardj350e8f72012-06-25 07:52:15 +00003314 pessimise wrt steering values. Perm32x4 ditto. */
sewardj620eb5b2005-10-22 12:50:43 +00003315 case Iop_Perm8x16:
3316 return mkUifUV128(
3317 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003318 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00003319 mkPCast8x16(mce, vatom2)
3320 );
sewardj350e8f72012-06-25 07:52:15 +00003321 case Iop_Perm32x4:
3322 return mkUifUV128(
3323 mce,
3324 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3325 mkPCast32x4(mce, vatom2)
3326 );
sewardj170ee212004-12-10 18:57:51 +00003327
sewardj43d60752005-11-10 18:13:01 +00003328 /* These two take the lower half of each 16-bit lane, sign/zero
3329 extend it to 32, and multiply together, producing a 32x4
3330 result (and implicitly ignoring half the operand bits). So
3331 treat it as a bunch of independent 16x8 operations, but then
3332 do 32-bit shifts left-right to copy the lower half results
3333 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3334 into the upper half of each result lane. */
3335 case Iop_MullEven16Ux8:
3336 case Iop_MullEven16Sx8: {
3337 IRAtom* at;
3338 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003339 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3340 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00003341 return at;
3342 }
3343
3344 /* Same deal as Iop_MullEven16{S,U}x8 */
3345 case Iop_MullEven8Ux16:
3346 case Iop_MullEven8Sx16: {
3347 IRAtom* at;
3348 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003349 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3350 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00003351 return at;
3352 }
3353
3354 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3355 32x4 -> 16x8 laneage, discarding the upper half of each lane.
3356 Simply apply same op to the V bits, since this really no more
3357 than a data steering operation. */
sewardj7ee7d852011-06-16 11:37:21 +00003358 case Iop_NarrowBin32to16x8:
3359 case Iop_NarrowBin16to8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00003360 return assignNew('V', mce, Ity_V128,
3361 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00003362
3363 case Iop_ShrV128:
3364 case Iop_ShlV128:
3365 /* Same scheme as with all other shifts. Note: 10 Nov 05:
3366 this is wrong now, scalar shifts are done properly lazily.
3367 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003368 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00003369 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00003370
sewardj69a13322005-04-23 01:14:51 +00003371 /* I128-bit data-steering */
3372 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00003373 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00003374
sewardj350e8f72012-06-25 07:52:15 +00003375 /* V256-bit SIMD */
3376
3377 case Iop_Add64Fx4:
3378 case Iop_Sub64Fx4:
3379 case Iop_Mul64Fx4:
3380 case Iop_Div64Fx4:
3381 case Iop_Max64Fx4:
3382 case Iop_Min64Fx4:
3383 return binary64Fx4(mce, vatom1, vatom2);
3384
3385 case Iop_Add32Fx8:
3386 case Iop_Sub32Fx8:
3387 case Iop_Mul32Fx8:
3388 case Iop_Div32Fx8:
3389 case Iop_Max32Fx8:
3390 case Iop_Min32Fx8:
3391 return binary32Fx8(mce, vatom1, vatom2);
3392
3393 /* V256-bit data-steering */
3394 case Iop_V128HLtoV256:
3395 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3396
sewardj3245c912004-12-10 14:58:26 +00003397 /* Scalar floating point */
3398
sewardjb5b87402011-03-07 16:05:35 +00003399 case Iop_F32toI64S:
florian1b9609a2012-09-01 00:15:45 +00003400 case Iop_F32toI64U:
sewardjb5b87402011-03-07 16:05:35 +00003401 /* I32(rm) x F32 -> I64 */
3402 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3403
3404 case Iop_I64StoF32:
3405 /* I32(rm) x I64 -> F32 */
3406 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3407
sewardjed69fdb2006-02-03 16:12:27 +00003408 case Iop_RoundF64toInt:
3409 case Iop_RoundF64toF32:
sewardj06f96d02009-12-31 19:24:12 +00003410 case Iop_F64toI64S:
sewardja201c452011-07-24 14:15:54 +00003411 case Iop_F64toI64U:
sewardj06f96d02009-12-31 19:24:12 +00003412 case Iop_I64StoF64:
sewardjf34eb492011-04-15 11:57:05 +00003413 case Iop_I64UtoF64:
sewardj22ac5f42006-02-03 22:55:04 +00003414 case Iop_SinF64:
3415 case Iop_CosF64:
3416 case Iop_TanF64:
3417 case Iop_2xm1F64:
3418 case Iop_SqrtF64:
3419 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00003420 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3421
sewardjea8b02f2012-04-12 17:28:57 +00003422 case Iop_ShlD64:
3423 case Iop_ShrD64:
sewardj18c72fa2012-04-23 11:22:05 +00003424 case Iop_RoundD64toInt:
florian054684f2013-06-06 21:21:46 +00003425 /* I32(rm) x D64 -> D64 */
sewardjea8b02f2012-04-12 17:28:57 +00003426 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3427
3428 case Iop_ShlD128:
3429 case Iop_ShrD128:
sewardj18c72fa2012-04-23 11:22:05 +00003430 case Iop_RoundD128toInt:
florian054684f2013-06-06 21:21:46 +00003431 /* I32(rm) x D128 -> D128 */
sewardjea8b02f2012-04-12 17:28:57 +00003432 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3433
3434 case Iop_D64toI64S:
florian53eb2a02013-01-12 22:04:00 +00003435 case Iop_D64toI64U:
sewardjea8b02f2012-04-12 17:28:57 +00003436 case Iop_I64StoD64:
florian53eb2a02013-01-12 22:04:00 +00003437 case Iop_I64UtoD64:
florian054684f2013-06-06 21:21:46 +00003438 /* I32(rm) x I64/D64 -> D64/I64 */
sewardjea8b02f2012-04-12 17:28:57 +00003439 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3440
florianba5693c2013-06-17 19:04:24 +00003441 case Iop_F32toD32:
3442 case Iop_F64toD32:
3443 case Iop_F128toD32:
3444 case Iop_D32toF32:
3445 case Iop_D64toF32:
3446 case Iop_D128toF32:
3447 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
3448 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3449
3450 case Iop_F32toD64:
florian39b08d82013-05-05 15:05:42 +00003451 case Iop_F64toD64:
florianba5693c2013-06-17 19:04:24 +00003452 case Iop_F128toD64:
3453 case Iop_D32toF64:
florian39b08d82013-05-05 15:05:42 +00003454 case Iop_D64toF64:
florian39b08d82013-05-05 15:05:42 +00003455 case Iop_D128toF64:
florianba5693c2013-06-17 19:04:24 +00003456 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
florian39b08d82013-05-05 15:05:42 +00003457 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3458
florianba5693c2013-06-17 19:04:24 +00003459 case Iop_F32toD128:
3460 case Iop_F64toD128:
florian39b08d82013-05-05 15:05:42 +00003461 case Iop_F128toD128:
florianba5693c2013-06-17 19:04:24 +00003462 case Iop_D32toF128:
3463 case Iop_D64toF128:
florian39b08d82013-05-05 15:05:42 +00003464 case Iop_D128toF128:
florianba5693c2013-06-17 19:04:24 +00003465 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
florian39b08d82013-05-05 15:05:42 +00003466 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3467
sewardjd376a762010-06-27 09:08:54 +00003468 case Iop_RoundF32toInt:
sewardjaec1be32010-01-03 22:29:32 +00003469 case Iop_SqrtF32:
3470 /* I32(rm) x I32/F32 -> I32/F32 */
3471 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3472
sewardjb5b87402011-03-07 16:05:35 +00003473 case Iop_SqrtF128:
3474 /* I32(rm) x F128 -> F128 */
3475 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3476
3477 case Iop_I32StoF32:
florian1b9609a2012-09-01 00:15:45 +00003478 case Iop_I32UtoF32:
sewardjb5b87402011-03-07 16:05:35 +00003479 case Iop_F32toI32S:
florian1b9609a2012-09-01 00:15:45 +00003480 case Iop_F32toI32U:
sewardjb5b87402011-03-07 16:05:35 +00003481 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3482 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3483
3484 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
florian1b9609a2012-09-01 00:15:45 +00003485 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
sewardjb5b87402011-03-07 16:05:35 +00003486 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
florian733b4db2013-06-06 19:13:29 +00003487 case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32 */
3488 case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32 */
sewardjb5b87402011-03-07 16:05:35 +00003489 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3490
3491 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
florian1b9609a2012-09-01 00:15:45 +00003492 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
sewardjb5b87402011-03-07 16:05:35 +00003493 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
florian733b4db2013-06-06 19:13:29 +00003494 case Iop_D128toD64: /* IRRoundingMode(I64) x D128 -> D64 */
3495 case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64 */
3496 case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64 */
sewardjb5b87402011-03-07 16:05:35 +00003497 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3498
3499 case Iop_F64HLtoF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00003500 case Iop_D64HLtoD128:
sewardj350e8f72012-06-25 07:52:15 +00003501 return assignNew('V', mce, Ity_I128,
3502 binop(Iop_64HLto128, vatom1, vatom2));
sewardjb5b87402011-03-07 16:05:35 +00003503
sewardj59570ff2010-01-01 11:59:33 +00003504 case Iop_F64toI32U:
sewardj06f96d02009-12-31 19:24:12 +00003505 case Iop_F64toI32S:
sewardje9e16d32004-12-10 13:17:55 +00003506 case Iop_F64toF32:
sewardjf34eb492011-04-15 11:57:05 +00003507 case Iop_I64UtoF32:
florian53eb2a02013-01-12 22:04:00 +00003508 case Iop_D64toI32U:
3509 case Iop_D64toI32S:
3510 /* First arg is I32 (rounding mode), second is F64/D64 (data). */
sewardj95448072004-11-22 20:19:51 +00003511 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3512
sewardjea8b02f2012-04-12 17:28:57 +00003513 case Iop_D64toD32:
florian054684f2013-06-06 21:21:46 +00003514 /* First arg is I32 (rounding mode), second is D64 (data). */
florianf4bed372012-12-21 04:25:10 +00003515 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
sewardjea8b02f2012-04-12 17:28:57 +00003516
sewardj06f96d02009-12-31 19:24:12 +00003517 case Iop_F64toI16S:
sewardj95448072004-11-22 20:19:51 +00003518 /* First arg is I32 (rounding mode), second is F64 (data). */
3519 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3520
sewardj18c72fa2012-04-23 11:22:05 +00003521 case Iop_InsertExpD64:
3522 /* I64 x I64 -> D64 */
3523 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3524
3525 case Iop_InsertExpD128:
3526 /* I64 x I128 -> D128 */
3527 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3528
sewardjb5b87402011-03-07 16:05:35 +00003529 case Iop_CmpF32:
sewardj95448072004-11-22 20:19:51 +00003530 case Iop_CmpF64:
sewardjb5b87402011-03-07 16:05:35 +00003531 case Iop_CmpF128:
sewardj18c72fa2012-04-23 11:22:05 +00003532 case Iop_CmpD64:
3533 case Iop_CmpD128:
florian29a36b92012-12-26 17:48:46 +00003534 case Iop_CmpExpD64:
3535 case Iop_CmpExpD128:
sewardj95448072004-11-22 20:19:51 +00003536 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3537
3538 /* non-FP after here */
3539
3540 case Iop_DivModU64to32:
3541 case Iop_DivModS64to32:
3542 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3543
sewardj69a13322005-04-23 01:14:51 +00003544 case Iop_DivModU128to64:
3545 case Iop_DivModS128to64:
3546 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3547
florian537ed2d2012-08-20 16:51:39 +00003548 case Iop_8HLto16:
3549 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003550 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003551 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003552 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00003553 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003554
sewardjb5b87402011-03-07 16:05:35 +00003555 case Iop_DivModS64to64:
sewardj6cf40ff2005-04-20 22:31:26 +00003556 case Iop_MullS64:
3557 case Iop_MullU64: {
3558 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3559 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj350e8f72012-06-25 07:52:15 +00003560 return assignNew('V', mce, Ity_I128,
3561 binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00003562 }
3563
sewardj95448072004-11-22 20:19:51 +00003564 case Iop_MullS32:
3565 case Iop_MullU32: {
3566 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3567 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj350e8f72012-06-25 07:52:15 +00003568 return assignNew('V', mce, Ity_I64,
3569 binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00003570 }
3571
3572 case Iop_MullS16:
3573 case Iop_MullU16: {
3574 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3575 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj350e8f72012-06-25 07:52:15 +00003576 return assignNew('V', mce, Ity_I32,
3577 binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00003578 }
3579
3580 case Iop_MullS8:
3581 case Iop_MullU8: {
3582 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3583 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00003584 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00003585 }
3586
sewardj5af05062010-10-18 16:31:14 +00003587 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
cerion9e591082005-06-23 15:28:34 +00003588 case Iop_DivS32:
3589 case Iop_DivU32:
sewardja201c452011-07-24 14:15:54 +00003590 case Iop_DivU32E:
sewardj169ac042011-09-05 12:12:34 +00003591 case Iop_DivS32E:
sewardj2157b2c2012-07-11 13:20:58 +00003592 case Iop_QAdd32S: /* could probably do better */
3593 case Iop_QSub32S: /* could probably do better */
cerion9e591082005-06-23 15:28:34 +00003594 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3595
sewardjb00944a2005-12-23 12:47:16 +00003596 case Iop_DivS64:
3597 case Iop_DivU64:
sewardja201c452011-07-24 14:15:54 +00003598 case Iop_DivS64E:
sewardj169ac042011-09-05 12:12:34 +00003599 case Iop_DivU64E:
sewardjb00944a2005-12-23 12:47:16 +00003600 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3601
sewardj95448072004-11-22 20:19:51 +00003602 case Iop_Add32:
sewardj54eac252012-03-27 10:19:39 +00003603 if (mce->bogusLiterals || mce->useLLVMworkarounds)
sewardjd5204dc2004-12-31 01:16:11 +00003604 return expensiveAddSub(mce,True,Ity_I32,
3605 vatom1,vatom2, atom1,atom2);
3606 else
3607 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00003608 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00003609 if (mce->bogusLiterals)
3610 return expensiveAddSub(mce,False,Ity_I32,
3611 vatom1,vatom2, atom1,atom2);
3612 else
3613 goto cheap_AddSub32;
3614
3615 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00003616 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00003617 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3618
sewardj463b3d92005-07-18 11:41:15 +00003619 case Iop_CmpORD32S:
3620 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00003621 case Iop_CmpORD64S:
3622 case Iop_CmpORD64U:
3623 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00003624
sewardj681be302005-01-15 20:43:58 +00003625 case Iop_Add64:
sewardj54eac252012-03-27 10:19:39 +00003626 if (mce->bogusLiterals || mce->useLLVMworkarounds)
tomd9774d72005-06-27 08:11:01 +00003627 return expensiveAddSub(mce,True,Ity_I64,
3628 vatom1,vatom2, atom1,atom2);
3629 else
3630 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00003631 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00003632 if (mce->bogusLiterals)
3633 return expensiveAddSub(mce,False,Ity_I64,
3634 vatom1,vatom2, atom1,atom2);
3635 else
3636 goto cheap_AddSub64;
3637
3638 cheap_AddSub64:
3639 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00003640 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3641
sewardj95448072004-11-22 20:19:51 +00003642 case Iop_Mul16:
3643 case Iop_Add16:
3644 case Iop_Sub16:
3645 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3646
florian537ed2d2012-08-20 16:51:39 +00003647 case Iop_Mul8:
sewardj95448072004-11-22 20:19:51 +00003648 case Iop_Sub8:
3649 case Iop_Add8:
3650 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3651
sewardj69a13322005-04-23 01:14:51 +00003652 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00003653 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00003654 if (mce->bogusLiterals)
sewardj4cfa81b2012-11-08 10:58:16 +00003655 goto expensive_cmp64;
sewardj69a13322005-04-23 01:14:51 +00003656 else
3657 goto cheap_cmp64;
sewardj4cfa81b2012-11-08 10:58:16 +00003658
3659 expensive_cmp64:
3660 case Iop_ExpCmpNE64:
3661 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3662
sewardj69a13322005-04-23 01:14:51 +00003663 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00003664 case Iop_CmpLE64S: case Iop_CmpLE64U:
3665 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00003666 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3667
sewardjd5204dc2004-12-31 01:16:11 +00003668 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00003669 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00003670 if (mce->bogusLiterals)
sewardj4cfa81b2012-11-08 10:58:16 +00003671 goto expensive_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00003672 else
3673 goto cheap_cmp32;
sewardj4cfa81b2012-11-08 10:58:16 +00003674
3675 expensive_cmp32:
3676 case Iop_ExpCmpNE32:
3677 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3678
sewardjd5204dc2004-12-31 01:16:11 +00003679 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00003680 case Iop_CmpLE32S: case Iop_CmpLE32U:
3681 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00003682 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3683
3684 case Iop_CmpEQ16: case Iop_CmpNE16:
3685 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3686
sewardj4cfa81b2012-11-08 10:58:16 +00003687 case Iop_ExpCmpNE16:
3688 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 );
3689
sewardj95448072004-11-22 20:19:51 +00003690 case Iop_CmpEQ8: case Iop_CmpNE8:
3691 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3692
sewardjafed4c52009-07-12 13:00:17 +00003693 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
3694 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3695 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3696 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3697 /* Just say these all produce a defined result, regardless
3698 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
3699 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3700
sewardjaaddbc22005-10-07 09:49:53 +00003701 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3702 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3703
sewardj95448072004-11-22 20:19:51 +00003704 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00003705 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003706
sewardjdb67f5f2004-12-14 01:15:31 +00003707 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00003708 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003709
florian537ed2d2012-08-20 16:51:39 +00003710 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
sewardjaaddbc22005-10-07 09:49:53 +00003711 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003712
sewardj350e8f72012-06-25 07:52:15 +00003713 case Iop_AndV256:
3714 uifu = mkUifUV256; difd = mkDifDV256;
3715 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003716 case Iop_AndV128:
3717 uifu = mkUifUV128; difd = mkDifDV128;
3718 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003719 case Iop_And64:
3720 uifu = mkUifU64; difd = mkDifD64;
3721 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003722 case Iop_And32:
3723 uifu = mkUifU32; difd = mkDifD32;
3724 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3725 case Iop_And16:
3726 uifu = mkUifU16; difd = mkDifD16;
3727 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3728 case Iop_And8:
3729 uifu = mkUifU8; difd = mkDifD8;
3730 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3731
sewardj350e8f72012-06-25 07:52:15 +00003732 case Iop_OrV256:
3733 uifu = mkUifUV256; difd = mkDifDV256;
3734 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003735 case Iop_OrV128:
3736 uifu = mkUifUV128; difd = mkDifDV128;
3737 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003738 case Iop_Or64:
3739 uifu = mkUifU64; difd = mkDifD64;
3740 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003741 case Iop_Or32:
3742 uifu = mkUifU32; difd = mkDifD32;
3743 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3744 case Iop_Or16:
3745 uifu = mkUifU16; difd = mkDifD16;
3746 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3747 case Iop_Or8:
3748 uifu = mkUifU8; difd = mkDifD8;
3749 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3750
3751 do_And_Or:
3752 return
3753 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00003754 'V', mce,
sewardj95448072004-11-22 20:19:51 +00003755 and_or_ty,
3756 difd(mce, uifu(mce, vatom1, vatom2),
3757 difd(mce, improve(mce, atom1, vatom1),
3758 improve(mce, atom2, vatom2) ) ) );
3759
3760 case Iop_Xor8:
3761 return mkUifU8(mce, vatom1, vatom2);
3762 case Iop_Xor16:
3763 return mkUifU16(mce, vatom1, vatom2);
3764 case Iop_Xor32:
3765 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00003766 case Iop_Xor64:
3767 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00003768 case Iop_XorV128:
3769 return mkUifUV128(mce, vatom1, vatom2);
sewardj350e8f72012-06-25 07:52:15 +00003770 case Iop_XorV256:
3771 return mkUifUV256(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00003772
sewardja2f30952013-03-27 11:40:02 +00003773 /* V256-bit SIMD */
3774
3775 case Iop_ShrN16x16:
3776 case Iop_ShrN32x8:
3777 case Iop_ShrN64x4:
3778 case Iop_SarN16x16:
3779 case Iop_SarN32x8:
3780 case Iop_ShlN16x16:
3781 case Iop_ShlN32x8:
3782 case Iop_ShlN64x4:
3783 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3784 this is wrong now, scalar shifts are done properly lazily.
3785 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003786 complainIfUndefined(mce, atom2, NULL);
sewardja2f30952013-03-27 11:40:02 +00003787 return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2));
3788
3789 case Iop_QSub8Ux32:
3790 case Iop_QSub8Sx32:
3791 case Iop_Sub8x32:
3792 case Iop_Min8Ux32:
3793 case Iop_Min8Sx32:
3794 case Iop_Max8Ux32:
3795 case Iop_Max8Sx32:
3796 case Iop_CmpGT8Sx32:
3797 case Iop_CmpEQ8x32:
3798 case Iop_Avg8Ux32:
3799 case Iop_QAdd8Ux32:
3800 case Iop_QAdd8Sx32:
3801 case Iop_Add8x32:
3802 return binary8Ix32(mce, vatom1, vatom2);
3803
3804 case Iop_QSub16Ux16:
3805 case Iop_QSub16Sx16:
3806 case Iop_Sub16x16:
3807 case Iop_Mul16x16:
3808 case Iop_MulHi16Sx16:
3809 case Iop_MulHi16Ux16:
3810 case Iop_Min16Sx16:
3811 case Iop_Min16Ux16:
3812 case Iop_Max16Sx16:
3813 case Iop_Max16Ux16:
3814 case Iop_CmpGT16Sx16:
3815 case Iop_CmpEQ16x16:
3816 case Iop_Avg16Ux16:
3817 case Iop_QAdd16Ux16:
3818 case Iop_QAdd16Sx16:
3819 case Iop_Add16x16:
3820 return binary16Ix16(mce, vatom1, vatom2);
3821
3822 case Iop_Sub32x8:
3823 case Iop_CmpGT32Sx8:
3824 case Iop_CmpEQ32x8:
3825 case Iop_Add32x8:
3826 case Iop_Max32Ux8:
3827 case Iop_Max32Sx8:
3828 case Iop_Min32Ux8:
3829 case Iop_Min32Sx8:
3830 case Iop_Mul32x8:
3831 return binary32Ix8(mce, vatom1, vatom2);
3832
3833 case Iop_Sub64x4:
3834 case Iop_Add64x4:
3835 case Iop_CmpEQ64x4:
3836 case Iop_CmpGT64Sx4:
3837 return binary64Ix4(mce, vatom1, vatom2);
3838
3839 /* Perm32x8: rearrange values in left arg using steering values
3840 from right arg. So rearrange the vbits in the same way but
3841 pessimise wrt steering values. */
3842 case Iop_Perm32x8:
3843 return mkUifUV256(
3844 mce,
3845 assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)),
3846 mkPCast32x8(mce, vatom2)
3847 );
3848
njn25e49d8e72002-09-23 09:36:25 +00003849 default:
sewardj95448072004-11-22 20:19:51 +00003850 ppIROp(op);
3851 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00003852 }
njn25e49d8e72002-09-23 09:36:25 +00003853}
3854
njn25e49d8e72002-09-23 09:36:25 +00003855
sewardj95448072004-11-22 20:19:51 +00003856static
3857IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3858{
sewardjcafe5052013-01-17 14:24:35 +00003859 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
3860 selection of shadow operation implicitly duplicates the logic in
3861 do_shadow_LoadG and should be kept in sync (in the very unlikely
3862 event that the interpretation of such widening ops changes in
3863 future). See comment in do_shadow_LoadG. */
sewardj95448072004-11-22 20:19:51 +00003864 IRAtom* vatom = expr2vbits( mce, atom );
3865 tl_assert(isOriginalAtom(mce,atom));
3866 switch (op) {
3867
sewardj0b070592004-12-10 21:44:22 +00003868 case Iop_Sqrt64Fx2:
3869 return unary64Fx2(mce, vatom);
3870
3871 case Iop_Sqrt64F0x2:
3872 return unary64F0x2(mce, vatom);
3873
sewardj350e8f72012-06-25 07:52:15 +00003874 case Iop_Sqrt32Fx8:
3875 case Iop_RSqrt32Fx8:
3876 case Iop_Recip32Fx8:
3877 return unary32Fx8(mce, vatom);
3878
3879 case Iop_Sqrt64Fx4:
3880 return unary64Fx4(mce, vatom);
3881
sewardj170ee212004-12-10 18:57:51 +00003882 case Iop_Sqrt32Fx4:
3883 case Iop_RSqrt32Fx4:
3884 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00003885 case Iop_I32UtoFx4:
3886 case Iop_I32StoFx4:
3887 case Iop_QFtoI32Ux4_RZ:
3888 case Iop_QFtoI32Sx4_RZ:
3889 case Iop_RoundF32x4_RM:
3890 case Iop_RoundF32x4_RP:
3891 case Iop_RoundF32x4_RN:
3892 case Iop_RoundF32x4_RZ:
sewardj57f92b02010-08-22 11:54:14 +00003893 case Iop_Recip32x4:
3894 case Iop_Abs32Fx4:
3895 case Iop_Neg32Fx4:
3896 case Iop_Rsqrte32Fx4:
sewardj170ee212004-12-10 18:57:51 +00003897 return unary32Fx4(mce, vatom);
3898
sewardj57f92b02010-08-22 11:54:14 +00003899 case Iop_I32UtoFx2:
3900 case Iop_I32StoFx2:
3901 case Iop_Recip32Fx2:
3902 case Iop_Recip32x2:
3903 case Iop_Abs32Fx2:
3904 case Iop_Neg32Fx2:
3905 case Iop_Rsqrte32Fx2:
3906 return unary32Fx2(mce, vatom);
3907
sewardj170ee212004-12-10 18:57:51 +00003908 case Iop_Sqrt32F0x4:
3909 case Iop_RSqrt32F0x4:
3910 case Iop_Recip32F0x4:
3911 return unary32F0x4(mce, vatom);
3912
sewardj20d38f22005-02-07 23:50:18 +00003913 case Iop_32UtoV128:
3914 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00003915 case Iop_Dup8x16:
3916 case Iop_Dup16x8:
3917 case Iop_Dup32x4:
sewardj57f92b02010-08-22 11:54:14 +00003918 case Iop_Reverse16_8x16:
3919 case Iop_Reverse32_8x16:
3920 case Iop_Reverse32_16x8:
3921 case Iop_Reverse64_8x16:
3922 case Iop_Reverse64_16x8:
3923 case Iop_Reverse64_32x4:
sewardj350e8f72012-06-25 07:52:15 +00003924 case Iop_V256toV128_1: case Iop_V256toV128_0:
sewardj7cf4e6b2008-05-01 20:24:26 +00003925 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00003926
sewardjb5b87402011-03-07 16:05:35 +00003927 case Iop_F128HItoF64: /* F128 -> high half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00003928 case Iop_D128HItoD64: /* D128 -> high half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00003929 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
3930 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00003931 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00003932 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
3933
3934 case Iop_NegF128:
3935 case Iop_AbsF128:
3936 return mkPCastTo(mce, Ity_I128, vatom);
3937
3938 case Iop_I32StoF128: /* signed I32 -> F128 */
3939 case Iop_I64StoF128: /* signed I64 -> F128 */
florian1b9609a2012-09-01 00:15:45 +00003940 case Iop_I32UtoF128: /* unsigned I32 -> F128 */
3941 case Iop_I64UtoF128: /* unsigned I64 -> F128 */
sewardjb5b87402011-03-07 16:05:35 +00003942 case Iop_F32toF128: /* F32 -> F128 */
3943 case Iop_F64toF128: /* F64 -> F128 */
florian53eb2a02013-01-12 22:04:00 +00003944 case Iop_I32StoD128: /* signed I64 -> D128 */
sewardjea8b02f2012-04-12 17:28:57 +00003945 case Iop_I64StoD128: /* signed I64 -> D128 */
florian53eb2a02013-01-12 22:04:00 +00003946 case Iop_I32UtoD128: /* unsigned I32 -> D128 */
3947 case Iop_I64UtoD128: /* unsigned I64 -> D128 */
sewardjb5b87402011-03-07 16:05:35 +00003948 return mkPCastTo(mce, Ity_I128, vatom);
3949
sewardj95448072004-11-22 20:19:51 +00003950 case Iop_F32toF64:
sewardj06f96d02009-12-31 19:24:12 +00003951 case Iop_I32StoF64:
sewardj59570ff2010-01-01 11:59:33 +00003952 case Iop_I32UtoF64:
sewardj95448072004-11-22 20:19:51 +00003953 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00003954 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00003955 case Iop_Est5FRSqrt:
sewardjdead90a2008-08-08 08:38:23 +00003956 case Iop_RoundF64toF64_NEAREST:
3957 case Iop_RoundF64toF64_NegINF:
3958 case Iop_RoundF64toF64_PosINF:
3959 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00003960 case Iop_Clz64:
sewardjea8b02f2012-04-12 17:28:57 +00003961 case Iop_D32toD64:
florian53eb2a02013-01-12 22:04:00 +00003962 case Iop_I32StoD64:
3963 case Iop_I32UtoD64:
sewardj18c72fa2012-04-23 11:22:05 +00003964 case Iop_ExtractExpD64: /* D64 -> I64 */
3965 case Iop_ExtractExpD128: /* D128 -> I64 */
florian974b4092012-12-27 20:06:18 +00003966 case Iop_ExtractSigD64: /* D64 -> I64 */
3967 case Iop_ExtractSigD128: /* D128 -> I64 */
florian1943eb52012-08-22 18:09:07 +00003968 case Iop_DPBtoBCD:
3969 case Iop_BCDtoDPB:
sewardj95448072004-11-22 20:19:51 +00003970 return mkPCastTo(mce, Ity_I64, vatom);
3971
sewardjea8b02f2012-04-12 17:28:57 +00003972 case Iop_D64toD128:
3973 return mkPCastTo(mce, Ity_I128, vatom);
3974
sewardj95448072004-11-22 20:19:51 +00003975 case Iop_Clz32:
sewardjed69fdb2006-02-03 16:12:27 +00003976 case Iop_TruncF64asF32:
sewardj59570ff2010-01-01 11:59:33 +00003977 case Iop_NegF32:
3978 case Iop_AbsF32:
sewardj95448072004-11-22 20:19:51 +00003979 return mkPCastTo(mce, Ity_I32, vatom);
3980
sewardj4cfa81b2012-11-08 10:58:16 +00003981 case Iop_Ctz32:
3982 case Iop_Ctz64:
3983 return expensiveCountTrailingZeroes(mce, op, atom, vatom);
3984
sewardjd9dbc192005-04-27 11:40:27 +00003985 case Iop_1Uto64:
sewardja201c452011-07-24 14:15:54 +00003986 case Iop_1Sto64:
sewardjd9dbc192005-04-27 11:40:27 +00003987 case Iop_8Uto64:
3988 case Iop_8Sto64:
3989 case Iop_16Uto64:
3990 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00003991 case Iop_32Sto64:
3992 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00003993 case Iop_V128to64:
3994 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00003995 case Iop_128HIto64:
3996 case Iop_128to64:
sewardj57f92b02010-08-22 11:54:14 +00003997 case Iop_Dup8x8:
3998 case Iop_Dup16x4:
3999 case Iop_Dup32x2:
4000 case Iop_Reverse16_8x8:
4001 case Iop_Reverse32_8x8:
4002 case Iop_Reverse32_16x4:
4003 case Iop_Reverse64_8x8:
4004 case Iop_Reverse64_16x4:
4005 case Iop_Reverse64_32x2:
sewardj350e8f72012-06-25 07:52:15 +00004006 case Iop_V256to64_0: case Iop_V256to64_1:
4007 case Iop_V256to64_2: case Iop_V256to64_3:
sewardj7cf4e6b2008-05-01 20:24:26 +00004008 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004009
4010 case Iop_64to32:
4011 case Iop_64HIto32:
4012 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00004013 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00004014 case Iop_8Uto32:
4015 case Iop_16Uto32:
4016 case Iop_16Sto32:
4017 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00004018 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00004019 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004020
4021 case Iop_8Sto16:
4022 case Iop_8Uto16:
4023 case Iop_32to16:
4024 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00004025 case Iop_64to16:
sewardjf5176342012-12-13 18:31:49 +00004026 case Iop_GetMSBs8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00004027 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004028
4029 case Iop_1Uto8:
sewardja201c452011-07-24 14:15:54 +00004030 case Iop_1Sto8:
sewardj95448072004-11-22 20:19:51 +00004031 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00004032 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00004033 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00004034 case Iop_64to8:
sewardj4cfa81b2012-11-08 10:58:16 +00004035 case Iop_GetMSBs8x8:
sewardj7cf4e6b2008-05-01 20:24:26 +00004036 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004037
4038 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00004039 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00004040
sewardjd9dbc192005-04-27 11:40:27 +00004041 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00004042 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00004043
sewardj95448072004-11-22 20:19:51 +00004044 case Iop_ReinterpF64asI64:
4045 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00004046 case Iop_ReinterpI32asF32:
sewardj59570ff2010-01-01 11:59:33 +00004047 case Iop_ReinterpF32asI32:
sewardj18c72fa2012-04-23 11:22:05 +00004048 case Iop_ReinterpI64asD64:
sewardj0892b822012-04-29 20:20:16 +00004049 case Iop_ReinterpD64asI64:
sewardj350e8f72012-06-25 07:52:15 +00004050 case Iop_NotV256:
sewardj20d38f22005-02-07 23:50:18 +00004051 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00004052 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00004053 case Iop_Not32:
4054 case Iop_Not16:
4055 case Iop_Not8:
4056 case Iop_Not1:
4057 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00004058
sewardj57f92b02010-08-22 11:54:14 +00004059 case Iop_CmpNEZ8x8:
4060 case Iop_Cnt8x8:
4061 case Iop_Clz8Sx8:
4062 case Iop_Cls8Sx8:
4063 case Iop_Abs8x8:
4064 return mkPCast8x8(mce, vatom);
4065
4066 case Iop_CmpNEZ8x16:
4067 case Iop_Cnt8x16:
4068 case Iop_Clz8Sx16:
4069 case Iop_Cls8Sx16:
4070 case Iop_Abs8x16:
4071 return mkPCast8x16(mce, vatom);
4072
4073 case Iop_CmpNEZ16x4:
4074 case Iop_Clz16Sx4:
4075 case Iop_Cls16Sx4:
4076 case Iop_Abs16x4:
4077 return mkPCast16x4(mce, vatom);
4078
4079 case Iop_CmpNEZ16x8:
4080 case Iop_Clz16Sx8:
4081 case Iop_Cls16Sx8:
4082 case Iop_Abs16x8:
4083 return mkPCast16x8(mce, vatom);
4084
4085 case Iop_CmpNEZ32x2:
4086 case Iop_Clz32Sx2:
4087 case Iop_Cls32Sx2:
4088 case Iop_FtoI32Ux2_RZ:
4089 case Iop_FtoI32Sx2_RZ:
4090 case Iop_Abs32x2:
4091 return mkPCast32x2(mce, vatom);
4092
4093 case Iop_CmpNEZ32x4:
4094 case Iop_Clz32Sx4:
4095 case Iop_Cls32Sx4:
4096 case Iop_FtoI32Ux4_RZ:
4097 case Iop_FtoI32Sx4_RZ:
4098 case Iop_Abs32x4:
4099 return mkPCast32x4(mce, vatom);
4100
florian537ed2d2012-08-20 16:51:39 +00004101 case Iop_CmpwNEZ32:
4102 return mkPCastTo(mce, Ity_I32, vatom);
4103
sewardj57f92b02010-08-22 11:54:14 +00004104 case Iop_CmpwNEZ64:
4105 return mkPCastTo(mce, Ity_I64, vatom);
4106
4107 case Iop_CmpNEZ64x2:
4108 return mkPCast64x2(mce, vatom);
4109
sewardj7ee7d852011-06-16 11:37:21 +00004110 case Iop_NarrowUn16to8x8:
4111 case Iop_NarrowUn32to16x4:
4112 case Iop_NarrowUn64to32x2:
4113 case Iop_QNarrowUn16Sto8Sx8:
4114 case Iop_QNarrowUn16Sto8Ux8:
4115 case Iop_QNarrowUn16Uto8Ux8:
4116 case Iop_QNarrowUn32Sto16Sx4:
4117 case Iop_QNarrowUn32Sto16Ux4:
4118 case Iop_QNarrowUn32Uto16Ux4:
4119 case Iop_QNarrowUn64Sto32Sx2:
4120 case Iop_QNarrowUn64Sto32Ux2:
4121 case Iop_QNarrowUn64Uto32Ux2:
4122 return vectorNarrowUnV128(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00004123
sewardj7ee7d852011-06-16 11:37:21 +00004124 case Iop_Widen8Sto16x8:
4125 case Iop_Widen8Uto16x8:
4126 case Iop_Widen16Sto32x4:
4127 case Iop_Widen16Uto32x4:
4128 case Iop_Widen32Sto64x2:
4129 case Iop_Widen32Uto64x2:
4130 return vectorWidenI64(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00004131
4132 case Iop_PwAddL32Ux2:
4133 case Iop_PwAddL32Sx2:
4134 return mkPCastTo(mce, Ity_I64,
4135 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
4136
4137 case Iop_PwAddL16Ux4:
4138 case Iop_PwAddL16Sx4:
4139 return mkPCast32x2(mce,
4140 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
4141
4142 case Iop_PwAddL8Ux8:
4143 case Iop_PwAddL8Sx8:
4144 return mkPCast16x4(mce,
4145 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
4146
4147 case Iop_PwAddL32Ux4:
4148 case Iop_PwAddL32Sx4:
4149 return mkPCast64x2(mce,
4150 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
4151
4152 case Iop_PwAddL16Ux8:
4153 case Iop_PwAddL16Sx8:
4154 return mkPCast32x4(mce,
4155 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
4156
4157 case Iop_PwAddL8Ux16:
4158 case Iop_PwAddL8Sx16:
4159 return mkPCast16x8(mce,
4160 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
4161
sewardjf34eb492011-04-15 11:57:05 +00004162 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00004163 default:
4164 ppIROp(op);
4165 VG_(tool_panic)("memcheck:expr2vbits_Unop");
4166 }
4167}
4168
4169
sewardjb9e6d242013-05-11 13:42:08 +00004170/* Worker function -- do not call directly. See comments on
4171 expr2vbits_Load for the meaning of |guard|.
4172
4173 Generates IR to (1) perform a definedness test of |addr|, (2)
4174 perform a validity test of |addr|, and (3) return the Vbits for the
4175 location indicated by |addr|. All of this only happens when
4176 |guard| is NULL or |guard| evaluates to True at run time.
4177
4178 If |guard| evaluates to False at run time, the returned value is
4179 the IR-mandated 0x55..55 value, and no checks nor shadow loads are
4180 performed.
4181
4182 The definedness of |guard| itself is not checked. That is assumed
4183 to have been done before this point, by the caller. */
sewardj95448072004-11-22 20:19:51 +00004184static
sewardj2e595852005-06-30 23:33:37 +00004185IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
4186 IREndness end, IRType ty,
sewardjcafe5052013-01-17 14:24:35 +00004187 IRAtom* addr, UInt bias, IRAtom* guard )
sewardj95448072004-11-22 20:19:51 +00004188{
4189 void* helper;
floriana5f894c2012-10-21 03:43:20 +00004190 const HChar* hname;
sewardj95448072004-11-22 20:19:51 +00004191 IRDirty* di;
4192 IRTemp datavbits;
4193 IRAtom* addrAct;
4194
4195 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00004196 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00004197
4198 /* First, emit a definedness test for the address. This also sets
4199 the address (shadow) to 'defined' following the test. */
sewardjb9e6d242013-05-11 13:42:08 +00004200 complainIfUndefined( mce, addr, guard );
sewardj95448072004-11-22 20:19:51 +00004201
4202 /* Now cook up a call to the relevant helper function, to read the
4203 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00004204 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00004205
4206 if (end == Iend_LE) {
4207 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00004208 case Ity_I64: helper = &MC_(helperc_LOADV64le);
4209 hname = "MC_(helperc_LOADV64le)";
sewardj2e595852005-06-30 23:33:37 +00004210 break;
njn1d0825f2006-03-27 11:37:07 +00004211 case Ity_I32: helper = &MC_(helperc_LOADV32le);
4212 hname = "MC_(helperc_LOADV32le)";
sewardj2e595852005-06-30 23:33:37 +00004213 break;
njn1d0825f2006-03-27 11:37:07 +00004214 case Ity_I16: helper = &MC_(helperc_LOADV16le);
4215 hname = "MC_(helperc_LOADV16le)";
sewardj2e595852005-06-30 23:33:37 +00004216 break;
njn1d0825f2006-03-27 11:37:07 +00004217 case Ity_I8: helper = &MC_(helperc_LOADV8);
4218 hname = "MC_(helperc_LOADV8)";
sewardj2e595852005-06-30 23:33:37 +00004219 break;
4220 default: ppIRType(ty);
sewardjb9e6d242013-05-11 13:42:08 +00004221 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)");
sewardj2e595852005-06-30 23:33:37 +00004222 }
4223 } else {
sewardj8cf88b72005-07-08 01:29:33 +00004224 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00004225 case Ity_I64: helper = &MC_(helperc_LOADV64be);
4226 hname = "MC_(helperc_LOADV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00004227 break;
njn1d0825f2006-03-27 11:37:07 +00004228 case Ity_I32: helper = &MC_(helperc_LOADV32be);
4229 hname = "MC_(helperc_LOADV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00004230 break;
njn1d0825f2006-03-27 11:37:07 +00004231 case Ity_I16: helper = &MC_(helperc_LOADV16be);
4232 hname = "MC_(helperc_LOADV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00004233 break;
njn1d0825f2006-03-27 11:37:07 +00004234 case Ity_I8: helper = &MC_(helperc_LOADV8);
4235 hname = "MC_(helperc_LOADV8)";
sewardj8cf88b72005-07-08 01:29:33 +00004236 break;
4237 default: ppIRType(ty);
sewardjb9e6d242013-05-11 13:42:08 +00004238 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)");
sewardj8cf88b72005-07-08 01:29:33 +00004239 }
sewardj95448072004-11-22 20:19:51 +00004240 }
4241
4242 /* Generate the actual address into addrAct. */
4243 if (bias == 0) {
4244 addrAct = addr;
4245 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00004246 IROp mkAdd;
4247 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00004248 IRType tyAddr = mce->hWordTy;
4249 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00004250 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4251 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00004252 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00004253 }
4254
4255 /* We need to have a place to park the V bits we're just about to
4256 read. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004257 datavbits = newTemp(mce, ty, VSh);
sewardj95448072004-11-22 20:19:51 +00004258 di = unsafeIRDirty_1_N( datavbits,
sewardj53ee1fc2005-12-23 02:29:58 +00004259 1/*regparms*/,
4260 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj95448072004-11-22 20:19:51 +00004261 mkIRExprVec_1( addrAct ));
4262 setHelperAnns( mce, di );
sewardjcafe5052013-01-17 14:24:35 +00004263 if (guard) {
4264 di->guard = guard;
4265 /* Ideally the didn't-happen return value here would be all-ones
4266 (all-undefined), so it'd be obvious if it got used
4267 inadvertantly. We can get by with the IR-mandated default
4268 value (0b01 repeating, 0x55 etc) as that'll still look pretty
4269 undefined if it ever leaks out. */
4270 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004271 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00004272
4273 return mkexpr(datavbits);
4274}
4275
4276
sewardjcafe5052013-01-17 14:24:35 +00004277/* Generate IR to do a shadow load. The helper is expected to check
4278 the validity of the address and return the V bits for that address.
4279 This can optionally be controlled by a guard, which is assumed to
4280 be True if NULL. In the case where the guard is False at runtime,
sewardjb9e6d242013-05-11 13:42:08 +00004281 the helper will return the didn't-do-the-call value of 0x55..55.
4282 Since that means "completely undefined result", the caller of
sewardjcafe5052013-01-17 14:24:35 +00004283 this function will need to fix up the result somehow in that
4284 case.
sewardjb9e6d242013-05-11 13:42:08 +00004285
4286 Caller of this function is also expected to have checked the
4287 definedness of |guard| before this point.
sewardjcafe5052013-01-17 14:24:35 +00004288*/
sewardj95448072004-11-22 20:19:51 +00004289static
sewardj2e595852005-06-30 23:33:37 +00004290IRAtom* expr2vbits_Load ( MCEnv* mce,
4291 IREndness end, IRType ty,
sewardjcafe5052013-01-17 14:24:35 +00004292 IRAtom* addr, UInt bias,
4293 IRAtom* guard )
sewardj170ee212004-12-10 18:57:51 +00004294{
sewardj2e595852005-06-30 23:33:37 +00004295 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00004296 switch (shadowTypeV(ty)) {
sewardj170ee212004-12-10 18:57:51 +00004297 case Ity_I8:
4298 case Ity_I16:
4299 case Ity_I32:
4300 case Ity_I64:
sewardjcafe5052013-01-17 14:24:35 +00004301 return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
sewardj45fa9f42012-05-21 10:18:10 +00004302 case Ity_V128: {
4303 IRAtom *v64hi, *v64lo;
sewardj2e595852005-06-30 23:33:37 +00004304 if (end == Iend_LE) {
sewardjcafe5052013-01-17 14:24:35 +00004305 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0, guard);
4306 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8, guard);
sewardj2e595852005-06-30 23:33:37 +00004307 } else {
sewardjcafe5052013-01-17 14:24:35 +00004308 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0, guard);
4309 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8, guard);
sewardj2e595852005-06-30 23:33:37 +00004310 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004311 return assignNew( 'V', mce,
sewardj170ee212004-12-10 18:57:51 +00004312 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00004313 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj45fa9f42012-05-21 10:18:10 +00004314 }
4315 case Ity_V256: {
4316 /* V256-bit case -- phrased in terms of 64 bit units (Qs),
4317 with Q3 being the most significant lane. */
4318 if (end == Iend_BE) goto unhandled;
sewardjcafe5052013-01-17 14:24:35 +00004319 IRAtom* v64Q0
4320 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0, guard);
4321 IRAtom* v64Q1
4322 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8, guard);
4323 IRAtom* v64Q2
4324 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16, guard);
4325 IRAtom* v64Q3
4326 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24, guard);
sewardj45fa9f42012-05-21 10:18:10 +00004327 return assignNew( 'V', mce,
4328 Ity_V256,
4329 IRExpr_Qop(Iop_64x4toV256,
4330 v64Q3, v64Q2, v64Q1, v64Q0));
4331 }
4332 unhandled:
sewardj170ee212004-12-10 18:57:51 +00004333 default:
sewardj2e595852005-06-30 23:33:37 +00004334 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00004335 }
4336}
4337
4338
sewardjcafe5052013-01-17 14:24:35 +00004339/* The most general handler for guarded loads. Assumes the
sewardjb9e6d242013-05-11 13:42:08 +00004340 definedness of GUARD has already been checked by the caller. A
4341 GUARD of NULL is assumed to mean "always True". Generates code to
4342 check the definedness and validity of ADDR.
sewardjcafe5052013-01-17 14:24:35 +00004343
4344 Generate IR to do a shadow load from ADDR and return the V bits.
4345 The loaded type is TY. The loaded data is then (shadow) widened by
4346 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD
4347 evaluates to False at run time then the returned Vbits are simply
4348 VALT instead. Note therefore that the argument type of VWIDEN must
4349 be TY and the result type of VWIDEN must equal the type of VALT.
4350*/
florian434ffae2012-07-19 17:23:42 +00004351static
sewardjcafe5052013-01-17 14:24:35 +00004352IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce,
4353 IREndness end, IRType ty,
4354 IRAtom* addr, UInt bias,
4355 IRAtom* guard,
4356 IROp vwiden, IRAtom* valt )
florian434ffae2012-07-19 17:23:42 +00004357{
sewardjcafe5052013-01-17 14:24:35 +00004358 /* Sanity check the conversion operation, and also set TYWIDE. */
4359 IRType tyWide = Ity_INVALID;
4360 switch (vwiden) {
4361 case Iop_INVALID:
4362 tyWide = ty;
4363 break;
4364 case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32:
4365 tyWide = Ity_I32;
4366 break;
4367 default:
4368 VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General");
florian434ffae2012-07-19 17:23:42 +00004369 }
4370
sewardjcafe5052013-01-17 14:24:35 +00004371 /* If the guard evaluates to True, this will hold the loaded V bits
4372 at TY. If the guard evaluates to False, this will be all
4373 ones, meaning "all undefined", in which case we will have to
florian5686b2d2013-01-29 03:57:40 +00004374 replace it using an ITE below. */
sewardjcafe5052013-01-17 14:24:35 +00004375 IRAtom* iftrue1
4376 = assignNew('V', mce, ty,
4377 expr2vbits_Load(mce, end, ty, addr, bias, guard));
4378 /* Now (shadow-) widen the loaded V bits to the desired width. In
4379 the guard-is-False case, the allowable widening operators will
4380 in the worst case (unsigned widening) at least leave the
4381 pre-widened part as being marked all-undefined, and in the best
4382 case (signed widening) mark the whole widened result as
4383 undefined. Anyway, it doesn't matter really, since in this case
florian5686b2d2013-01-29 03:57:40 +00004384 we will replace said value with the default value |valt| using an
4385 ITE. */
sewardjcafe5052013-01-17 14:24:35 +00004386 IRAtom* iftrue2
4387 = vwiden == Iop_INVALID
4388 ? iftrue1
4389 : assignNew('V', mce, tyWide, unop(vwiden, iftrue1));
4390 /* These are the V bits we will return if the load doesn't take
4391 place. */
4392 IRAtom* iffalse
4393 = valt;
florian5686b2d2013-01-29 03:57:40 +00004394 /* Prepare the cond for the ITE. Convert a NULL cond into
sewardjcafe5052013-01-17 14:24:35 +00004395 something that iropt knows how to fold out later. */
4396 IRAtom* cond
sewardjcc961652013-01-26 11:49:15 +00004397 = guard == NULL ? mkU1(1) : guard;
sewardjcafe5052013-01-17 14:24:35 +00004398 /* And assemble the final result. */
florian5686b2d2013-01-29 03:57:40 +00004399 return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse));
sewardjcafe5052013-01-17 14:24:35 +00004400}
4401
4402
4403/* A simpler handler for guarded loads, in which there is no
4404 conversion operation, and the default V bit return (when the guard
4405 evaluates to False at runtime) is "all defined". If there is no
4406 guard expression or the guard is always TRUE this function behaves
sewardjb9e6d242013-05-11 13:42:08 +00004407 like expr2vbits_Load. It is assumed that definedness of GUARD has
4408 already been checked at the call site. */
sewardjcafe5052013-01-17 14:24:35 +00004409static
4410IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce,
4411 IREndness end, IRType ty,
4412 IRAtom* addr, UInt bias,
4413 IRAtom *guard )
4414{
4415 return expr2vbits_Load_guarded_General(
4416 mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty)
4417 );
florian434ffae2012-07-19 17:23:42 +00004418}
4419
4420
sewardj170ee212004-12-10 18:57:51 +00004421static
florian5686b2d2013-01-29 03:57:40 +00004422IRAtom* expr2vbits_ITE ( MCEnv* mce,
4423 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
sewardj95448072004-11-22 20:19:51 +00004424{
florian5686b2d2013-01-29 03:57:40 +00004425 IRAtom *vbitsC, *vbits0, *vbits1;
sewardj95448072004-11-22 20:19:51 +00004426 IRType ty;
sewardj07bfda22013-01-29 21:11:55 +00004427 /* Given ITE(cond, iftrue, iffalse), generate
4428 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
sewardj95448072004-11-22 20:19:51 +00004429 That is, steer the V bits like the originals, but trash the
4430 result if the steering value is undefined. This gives
4431 lazy propagation. */
4432 tl_assert(isOriginalAtom(mce, cond));
florian5686b2d2013-01-29 03:57:40 +00004433 tl_assert(isOriginalAtom(mce, iftrue));
4434 tl_assert(isOriginalAtom(mce, iffalse));
sewardj95448072004-11-22 20:19:51 +00004435
4436 vbitsC = expr2vbits(mce, cond);
florian5686b2d2013-01-29 03:57:40 +00004437 vbits1 = expr2vbits(mce, iftrue);
sewardj07bfda22013-01-29 21:11:55 +00004438 vbits0 = expr2vbits(mce, iffalse);
sewardj1c0ce7a2009-07-01 08:10:49 +00004439 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00004440
4441 return
sewardj7cf4e6b2008-05-01 20:24:26 +00004442 mkUifU(mce, ty, assignNew('V', mce, ty,
florian5686b2d2013-01-29 03:57:40 +00004443 IRExpr_ITE(cond, vbits1, vbits0)),
sewardj95448072004-11-22 20:19:51 +00004444 mkPCastTo(mce, ty, vbitsC) );
4445}
4446
4447/* --------- This is the main expression-handling function. --------- */
4448
4449static
4450IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4451{
4452 switch (e->tag) {
4453
4454 case Iex_Get:
4455 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4456
4457 case Iex_GetI:
4458 return shadow_GETI( mce, e->Iex.GetI.descr,
4459 e->Iex.GetI.ix, e->Iex.GetI.bias );
4460
sewardj0b9d74a2006-12-24 02:24:11 +00004461 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004462 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00004463
4464 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00004465 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00004466
sewardje91cea72006-02-08 19:32:02 +00004467 case Iex_Qop:
4468 return expr2vbits_Qop(
4469 mce,
floriane2ab2972012-06-01 20:43:03 +00004470 e->Iex.Qop.details->op,
4471 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4472 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
sewardje91cea72006-02-08 19:32:02 +00004473 );
4474
sewardjed69fdb2006-02-03 16:12:27 +00004475 case Iex_Triop:
4476 return expr2vbits_Triop(
4477 mce,
florian26441742012-06-02 20:30:41 +00004478 e->Iex.Triop.details->op,
4479 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4480 e->Iex.Triop.details->arg3
sewardjed69fdb2006-02-03 16:12:27 +00004481 );
4482
sewardj95448072004-11-22 20:19:51 +00004483 case Iex_Binop:
4484 return expr2vbits_Binop(
4485 mce,
4486 e->Iex.Binop.op,
4487 e->Iex.Binop.arg1, e->Iex.Binop.arg2
4488 );
4489
4490 case Iex_Unop:
4491 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4492
sewardj2e595852005-06-30 23:33:37 +00004493 case Iex_Load:
4494 return expr2vbits_Load( mce, e->Iex.Load.end,
4495 e->Iex.Load.ty,
sewardjcafe5052013-01-17 14:24:35 +00004496 e->Iex.Load.addr, 0/*addr bias*/,
4497 NULL/* guard == "always True"*/ );
sewardj95448072004-11-22 20:19:51 +00004498
4499 case Iex_CCall:
4500 return mkLazyN( mce, e->Iex.CCall.args,
4501 e->Iex.CCall.retty,
4502 e->Iex.CCall.cee );
4503
florian5686b2d2013-01-29 03:57:40 +00004504 case Iex_ITE:
4505 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
sewardj07bfda22013-01-29 21:11:55 +00004506 e->Iex.ITE.iffalse);
njn25e49d8e72002-09-23 09:36:25 +00004507
4508 default:
sewardj95448072004-11-22 20:19:51 +00004509 VG_(printf)("\n");
4510 ppIRExpr(e);
4511 VG_(printf)("\n");
4512 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00004513 }
njn25e49d8e72002-09-23 09:36:25 +00004514}
4515
4516/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00004517/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00004518/*------------------------------------------------------------*/
4519
sewardj95448072004-11-22 20:19:51 +00004520/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00004521
4522static
sewardj95448072004-11-22 20:19:51 +00004523IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00004524{
sewardj7cf97ee2004-11-28 14:25:01 +00004525 IRType ty, tyH;
4526
sewardj95448072004-11-22 20:19:51 +00004527 /* vatom is vbits-value and as such can only have a shadow type. */
4528 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00004529
sewardj1c0ce7a2009-07-01 08:10:49 +00004530 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00004531 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00004532
sewardj95448072004-11-22 20:19:51 +00004533 if (tyH == Ity_I32) {
4534 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004535 case Ity_I32:
4536 return vatom;
4537 case Ity_I16:
4538 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4539 case Ity_I8:
4540 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4541 default:
4542 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004543 }
sewardj6cf40ff2005-04-20 22:31:26 +00004544 } else
4545 if (tyH == Ity_I64) {
4546 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004547 case Ity_I32:
4548 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4549 case Ity_I16:
4550 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4551 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4552 case Ity_I8:
4553 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4554 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4555 default:
4556 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00004557 }
sewardj95448072004-11-22 20:19:51 +00004558 } else {
4559 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004560 }
sewardj95448072004-11-22 20:19:51 +00004561 unhandled:
4562 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
4563 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00004564}
4565
njn25e49d8e72002-09-23 09:36:25 +00004566
sewardjcafe5052013-01-17 14:24:35 +00004567/* Generate a shadow store. |addr| is always the original address
4568 atom. You can pass in either originals or V-bits for the data
4569 atom, but obviously not both. This function generates a check for
sewardjb9e6d242013-05-11 13:42:08 +00004570 the definedness and (indirectly) the validity of |addr|, but only
4571 when |guard| evaluates to True at run time (or is NULL).
njn25e49d8e72002-09-23 09:36:25 +00004572
sewardjcafe5052013-01-17 14:24:35 +00004573 |guard| :: Ity_I1 controls whether the store really happens; NULL
4574 means it unconditionally does. Note that |guard| itself is not
4575 checked for definedness; the caller of this function must do that
4576 if necessary.
4577*/
sewardj95448072004-11-22 20:19:51 +00004578static
sewardj2e595852005-06-30 23:33:37 +00004579void do_shadow_Store ( MCEnv* mce,
4580 IREndness end,
4581 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00004582 IRAtom* data, IRAtom* vdata,
4583 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00004584{
sewardj170ee212004-12-10 18:57:51 +00004585 IROp mkAdd;
4586 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00004587 void* helper = NULL;
floriana5f894c2012-10-21 03:43:20 +00004588 const HChar* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00004589 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00004590
4591 tyAddr = mce->hWordTy;
4592 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4593 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00004594 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00004595
sewardj95448072004-11-22 20:19:51 +00004596 if (data) {
4597 tl_assert(!vdata);
4598 tl_assert(isOriginalAtom(mce, data));
4599 tl_assert(bias == 0);
4600 vdata = expr2vbits( mce, data );
4601 } else {
4602 tl_assert(vdata);
4603 }
njn25e49d8e72002-09-23 09:36:25 +00004604
sewardj95448072004-11-22 20:19:51 +00004605 tl_assert(isOriginalAtom(mce,addr));
4606 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00004607
sewardj1c0ce7a2009-07-01 08:10:49 +00004608 if (guard) {
4609 tl_assert(isOriginalAtom(mce, guard));
4610 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4611 }
4612
4613 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00004614
njn1d0825f2006-03-27 11:37:07 +00004615 // If we're not doing undefined value checking, pretend that this value
4616 // is "all valid". That lets Vex's optimiser remove some of the V bit
4617 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00004618 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00004619 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004620 case Ity_V256: // V256 weirdness -- used four times
sewardjbd43bfa2012-06-29 15:29:37 +00004621 c = IRConst_V256(V_BITS32_DEFINED); break;
sewardj45fa9f42012-05-21 10:18:10 +00004622 case Ity_V128: // V128 weirdness -- used twice
sewardj1c0ce7a2009-07-01 08:10:49 +00004623 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00004624 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
4625 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
4626 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
4627 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
4628 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4629 }
4630 vdata = IRExpr_Const( c );
4631 }
4632
sewardj95448072004-11-22 20:19:51 +00004633 /* First, emit a definedness test for the address. This also sets
sewardjb9e6d242013-05-11 13:42:08 +00004634 the address (shadow) to 'defined' following the test. Both of
4635 those actions are gated on |guard|. */
4636 complainIfUndefined( mce, addr, guard );
njn25e49d8e72002-09-23 09:36:25 +00004637
sewardj170ee212004-12-10 18:57:51 +00004638 /* Now decide which helper function to call to write the data V
4639 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00004640 if (end == Iend_LE) {
4641 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004642 case Ity_V256: /* we'll use the helper four times */
sewardj2e595852005-06-30 23:33:37 +00004643 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004644 case Ity_I64: helper = &MC_(helperc_STOREV64le);
4645 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00004646 break;
njn1d0825f2006-03-27 11:37:07 +00004647 case Ity_I32: helper = &MC_(helperc_STOREV32le);
4648 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00004649 break;
njn1d0825f2006-03-27 11:37:07 +00004650 case Ity_I16: helper = &MC_(helperc_STOREV16le);
4651 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00004652 break;
njn1d0825f2006-03-27 11:37:07 +00004653 case Ity_I8: helper = &MC_(helperc_STOREV8);
4654 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00004655 break;
4656 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4657 }
4658 } else {
sewardj8cf88b72005-07-08 01:29:33 +00004659 switch (ty) {
4660 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004661 case Ity_I64: helper = &MC_(helperc_STOREV64be);
4662 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00004663 break;
njn1d0825f2006-03-27 11:37:07 +00004664 case Ity_I32: helper = &MC_(helperc_STOREV32be);
4665 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00004666 break;
njn1d0825f2006-03-27 11:37:07 +00004667 case Ity_I16: helper = &MC_(helperc_STOREV16be);
4668 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00004669 break;
njn1d0825f2006-03-27 11:37:07 +00004670 case Ity_I8: helper = &MC_(helperc_STOREV8);
4671 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00004672 break;
sewardj45fa9f42012-05-21 10:18:10 +00004673 /* Note, no V256 case here, because no big-endian target that
4674 we support, has 256 vectors. */
sewardj8cf88b72005-07-08 01:29:33 +00004675 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
4676 }
sewardj95448072004-11-22 20:19:51 +00004677 }
njn25e49d8e72002-09-23 09:36:25 +00004678
sewardj45fa9f42012-05-21 10:18:10 +00004679 if (UNLIKELY(ty == Ity_V256)) {
4680
4681 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
4682 Q3 being the most significant lane. */
4683 /* These are the offsets of the Qs in memory. */
4684 Int offQ0, offQ1, offQ2, offQ3;
4685
4686 /* Various bits for constructing the 4 lane helper calls */
4687 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
4688 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
4689 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
4690 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
4691
4692 if (end == Iend_LE) {
4693 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
4694 } else {
4695 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
4696 }
4697
4698 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
4699 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
4700 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
4701 diQ0 = unsafeIRDirty_0_N(
4702 1/*regparms*/,
4703 hname, VG_(fnptr_to_fnentry)( helper ),
4704 mkIRExprVec_2( addrQ0, vdataQ0 )
4705 );
4706
4707 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
4708 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
4709 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
4710 diQ1 = unsafeIRDirty_0_N(
4711 1/*regparms*/,
4712 hname, VG_(fnptr_to_fnentry)( helper ),
4713 mkIRExprVec_2( addrQ1, vdataQ1 )
4714 );
4715
4716 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
4717 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
4718 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
4719 diQ2 = unsafeIRDirty_0_N(
4720 1/*regparms*/,
4721 hname, VG_(fnptr_to_fnentry)( helper ),
4722 mkIRExprVec_2( addrQ2, vdataQ2 )
4723 );
4724
4725 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
4726 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
4727 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
4728 diQ3 = unsafeIRDirty_0_N(
4729 1/*regparms*/,
4730 hname, VG_(fnptr_to_fnentry)( helper ),
4731 mkIRExprVec_2( addrQ3, vdataQ3 )
4732 );
4733
4734 if (guard)
4735 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
4736
4737 setHelperAnns( mce, diQ0 );
4738 setHelperAnns( mce, diQ1 );
4739 setHelperAnns( mce, diQ2 );
4740 setHelperAnns( mce, diQ3 );
4741 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
4742 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
4743 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
4744 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
4745
4746 }
4747 else if (UNLIKELY(ty == Ity_V128)) {
sewardj170ee212004-12-10 18:57:51 +00004748
sewardj20d38f22005-02-07 23:50:18 +00004749 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00004750 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00004751 /* also, need to be careful about endianness */
4752
njn4c245e52009-03-15 23:25:38 +00004753 Int offLo64, offHi64;
4754 IRDirty *diLo64, *diHi64;
4755 IRAtom *addrLo64, *addrHi64;
4756 IRAtom *vdataLo64, *vdataHi64;
4757 IRAtom *eBiasLo64, *eBiasHi64;
4758
sewardj2e595852005-06-30 23:33:37 +00004759 if (end == Iend_LE) {
4760 offLo64 = 0;
4761 offHi64 = 8;
4762 } else {
sewardj2e595852005-06-30 23:33:37 +00004763 offLo64 = 8;
4764 offHi64 = 0;
4765 }
4766
4767 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004768 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
4769 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004770 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004771 1/*regparms*/,
4772 hname, VG_(fnptr_to_fnentry)( helper ),
4773 mkIRExprVec_2( addrLo64, vdataLo64 )
4774 );
sewardj2e595852005-06-30 23:33:37 +00004775 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004776 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
4777 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004778 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004779 1/*regparms*/,
4780 hname, VG_(fnptr_to_fnentry)( helper ),
4781 mkIRExprVec_2( addrHi64, vdataHi64 )
4782 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004783 if (guard) diLo64->guard = guard;
4784 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004785 setHelperAnns( mce, diLo64 );
4786 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00004787 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
4788 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00004789
sewardj95448072004-11-22 20:19:51 +00004790 } else {
sewardj170ee212004-12-10 18:57:51 +00004791
njn4c245e52009-03-15 23:25:38 +00004792 IRDirty *di;
4793 IRAtom *addrAct;
4794
sewardj170ee212004-12-10 18:57:51 +00004795 /* 8/16/32/64-bit cases */
4796 /* Generate the actual address into addrAct. */
4797 if (bias == 0) {
4798 addrAct = addr;
4799 } else {
njn4c245e52009-03-15 23:25:38 +00004800 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00004801 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00004802 }
4803
4804 if (ty == Ity_I64) {
4805 /* We can't do this with regparm 2 on 32-bit platforms, since
4806 the back ends aren't clever enough to handle 64-bit
4807 regparm args. Therefore be different. */
4808 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004809 1/*regparms*/,
4810 hname, VG_(fnptr_to_fnentry)( helper ),
4811 mkIRExprVec_2( addrAct, vdata )
4812 );
sewardj170ee212004-12-10 18:57:51 +00004813 } else {
4814 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004815 2/*regparms*/,
4816 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00004817 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00004818 zwidenToHostWord( mce, vdata ))
4819 );
sewardj170ee212004-12-10 18:57:51 +00004820 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004821 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004822 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00004823 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00004824 }
njn25e49d8e72002-09-23 09:36:25 +00004825
sewardj95448072004-11-22 20:19:51 +00004826}
njn25e49d8e72002-09-23 09:36:25 +00004827
njn25e49d8e72002-09-23 09:36:25 +00004828
sewardj95448072004-11-22 20:19:51 +00004829/* Do lazy pessimistic propagation through a dirty helper call, by
4830 looking at the annotations on it. This is the most complex part of
4831 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00004832
sewardj95448072004-11-22 20:19:51 +00004833static IRType szToITy ( Int n )
4834{
4835 switch (n) {
4836 case 1: return Ity_I8;
4837 case 2: return Ity_I16;
4838 case 4: return Ity_I32;
4839 case 8: return Ity_I64;
4840 default: VG_(tool_panic)("szToITy(memcheck)");
4841 }
4842}
njn25e49d8e72002-09-23 09:36:25 +00004843
sewardj95448072004-11-22 20:19:51 +00004844static
4845void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
4846{
sewardj2eecb742012-06-01 16:11:41 +00004847 Int i, k, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00004848 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00004849 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00004850 IRTemp dst;
4851 IREndness end;
4852
4853 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00004854# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004855 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00004856# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004857 end = Iend_LE;
4858# else
4859# error "Unknown endianness"
4860# endif
njn25e49d8e72002-09-23 09:36:25 +00004861
sewardj95448072004-11-22 20:19:51 +00004862 /* First check the guard. */
sewardjb9e6d242013-05-11 13:42:08 +00004863 complainIfUndefined(mce, d->guard, NULL);
sewardj95448072004-11-22 20:19:51 +00004864
4865 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00004866 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00004867
florian434ffae2012-07-19 17:23:42 +00004868 /* Inputs: unmasked args
4869 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj95448072004-11-22 20:19:51 +00004870 for (i = 0; d->args[i]; i++) {
4871 if (d->cee->mcx_mask & (1<<i)) {
4872 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00004873 } else {
sewardj95448072004-11-22 20:19:51 +00004874 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
4875 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00004876 }
4877 }
sewardj95448072004-11-22 20:19:51 +00004878
4879 /* Inputs: guest state that we read. */
4880 for (i = 0; i < d->nFxState; i++) {
4881 tl_assert(d->fxState[i].fx != Ifx_None);
4882 if (d->fxState[i].fx == Ifx_Write)
4883 continue;
sewardja7203252004-11-26 19:17:47 +00004884
sewardj2eecb742012-06-01 16:11:41 +00004885 /* Enumerate the described state segments */
4886 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4887 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4888 gSz = d->fxState[i].size;
sewardja7203252004-11-26 19:17:47 +00004889
sewardj2eecb742012-06-01 16:11:41 +00004890 /* Ignore any sections marked as 'always defined'. */
4891 if (isAlwaysDefd(mce, gOff, gSz)) {
4892 if (0)
4893 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4894 gOff, gSz);
4895 continue;
4896 }
sewardje9e16d32004-12-10 13:17:55 +00004897
sewardj2eecb742012-06-01 16:11:41 +00004898 /* This state element is read or modified. So we need to
4899 consider it. If larger than 8 bytes, deal with it in
4900 8-byte chunks. */
4901 while (True) {
4902 tl_assert(gSz >= 0);
4903 if (gSz == 0) break;
4904 n = gSz <= 8 ? gSz : 8;
4905 /* update 'curr' with UifU of the state slice
4906 gOff .. gOff+n-1 */
4907 tySrc = szToITy( n );
florian434ffae2012-07-19 17:23:42 +00004908
4909 /* Observe the guard expression. If it is false use an
4910 all-bits-defined bit pattern */
4911 IRAtom *cond, *iffalse, *iftrue;
4912
sewardjcc961652013-01-26 11:49:15 +00004913 cond = assignNew('V', mce, Ity_I1, d->guard);
florian434ffae2012-07-19 17:23:42 +00004914 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
4915 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
4916 src = assignNew('V', mce, tySrc,
florian5686b2d2013-01-29 03:57:40 +00004917 IRExpr_ITE(cond, iftrue, iffalse));
florian434ffae2012-07-19 17:23:42 +00004918
sewardj2eecb742012-06-01 16:11:41 +00004919 here = mkPCastTo( mce, Ity_I32, src );
4920 curr = mkUifU32(mce, here, curr);
4921 gSz -= n;
4922 gOff += n;
4923 }
4924 }
sewardj95448072004-11-22 20:19:51 +00004925 }
4926
4927 /* Inputs: memory. First set up some info needed regardless of
4928 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00004929
4930 if (d->mFx != Ifx_None) {
4931 /* Because we may do multiple shadow loads/stores from the same
4932 base address, it's best to do a single test of its
4933 definedness right now. Post-instrumentation optimisation
4934 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00004935 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00004936 tl_assert(d->mAddr);
sewardjb9e6d242013-05-11 13:42:08 +00004937 complainIfUndefined(mce, d->mAddr, d->guard);
sewardj95448072004-11-22 20:19:51 +00004938
sewardj1c0ce7a2009-07-01 08:10:49 +00004939 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00004940 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
4941 tl_assert(tyAddr == mce->hWordTy); /* not really right */
4942 }
4943
4944 /* Deal with memory inputs (reads or modifies) */
4945 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00004946 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00004947 /* chew off 32-bit chunks. We don't care about the endianness
4948 since it's all going to be condensed down to a single bit,
4949 but nevertheless choose an endianness which is hopefully
4950 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00004951 while (toDo >= 4) {
4952 here = mkPCastTo(
4953 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00004954 expr2vbits_Load_guarded_Simple(
4955 mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00004956 );
4957 curr = mkUifU32(mce, here, curr);
4958 toDo -= 4;
4959 }
4960 /* chew off 16-bit chunks */
4961 while (toDo >= 2) {
4962 here = mkPCastTo(
4963 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00004964 expr2vbits_Load_guarded_Simple(
4965 mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00004966 );
4967 curr = mkUifU32(mce, here, curr);
4968 toDo -= 2;
4969 }
floriancda994b2012-06-08 16:01:19 +00004970 /* chew off the remaining 8-bit chunk, if any */
4971 if (toDo == 1) {
4972 here = mkPCastTo(
4973 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00004974 expr2vbits_Load_guarded_Simple(
4975 mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard )
floriancda994b2012-06-08 16:01:19 +00004976 );
4977 curr = mkUifU32(mce, here, curr);
4978 toDo -= 1;
4979 }
4980 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00004981 }
4982
4983 /* Whew! So curr is a 32-bit V-value summarising pessimistically
4984 all the inputs to the helper. Now we need to re-distribute the
4985 results to all destinations. */
4986
4987 /* Outputs: the destination temporary, if there is one. */
4988 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004989 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00004990 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00004991 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00004992 }
4993
4994 /* Outputs: guest state that we write or modify. */
4995 for (i = 0; i < d->nFxState; i++) {
4996 tl_assert(d->fxState[i].fx != Ifx_None);
4997 if (d->fxState[i].fx == Ifx_Read)
4998 continue;
sewardj2eecb742012-06-01 16:11:41 +00004999
5000 /* Enumerate the described state segments */
5001 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5002 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5003 gSz = d->fxState[i].size;
5004
5005 /* Ignore any sections marked as 'always defined'. */
5006 if (isAlwaysDefd(mce, gOff, gSz))
5007 continue;
5008
5009 /* This state element is written or modified. So we need to
5010 consider it. If larger than 8 bytes, deal with it in
5011 8-byte chunks. */
5012 while (True) {
5013 tl_assert(gSz >= 0);
5014 if (gSz == 0) break;
5015 n = gSz <= 8 ? gSz : 8;
5016 /* Write suitably-casted 'curr' to the state slice
5017 gOff .. gOff+n-1 */
5018 tyDst = szToITy( n );
5019 do_shadow_PUT( mce, gOff,
5020 NULL, /* original atom */
florian434ffae2012-07-19 17:23:42 +00005021 mkPCastTo( mce, tyDst, curr ), d->guard );
sewardj2eecb742012-06-01 16:11:41 +00005022 gSz -= n;
5023 gOff += n;
5024 }
sewardje9e16d32004-12-10 13:17:55 +00005025 }
sewardj95448072004-11-22 20:19:51 +00005026 }
5027
sewardj2e595852005-06-30 23:33:37 +00005028 /* Outputs: memory that we write or modify. Same comments about
5029 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00005030 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00005031 toDo = d->mSize;
5032 /* chew off 32-bit chunks */
5033 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00005034 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5035 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00005036 mkPCastTo( mce, Ity_I32, curr ),
florian434ffae2012-07-19 17:23:42 +00005037 d->guard );
sewardj95448072004-11-22 20:19:51 +00005038 toDo -= 4;
5039 }
5040 /* chew off 16-bit chunks */
5041 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00005042 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5043 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00005044 mkPCastTo( mce, Ity_I16, curr ),
florian434ffae2012-07-19 17:23:42 +00005045 d->guard );
sewardj95448072004-11-22 20:19:51 +00005046 toDo -= 2;
5047 }
floriancda994b2012-06-08 16:01:19 +00005048 /* chew off the remaining 8-bit chunk, if any */
5049 if (toDo == 1) {
5050 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5051 NULL, /* original data */
5052 mkPCastTo( mce, Ity_I8, curr ),
florian434ffae2012-07-19 17:23:42 +00005053 d->guard );
floriancda994b2012-06-08 16:01:19 +00005054 toDo -= 1;
5055 }
5056 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00005057 }
5058
njn25e49d8e72002-09-23 09:36:25 +00005059}
5060
sewardj1c0ce7a2009-07-01 08:10:49 +00005061
sewardj826ec492005-05-12 18:05:00 +00005062/* We have an ABI hint telling us that [base .. base+len-1] is to
5063 become undefined ("writable"). Generate code to call a helper to
5064 notify the A/V bit machinery of this fact.
5065
5066 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00005067 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
5068 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00005069*/
5070static
sewardj7cf4e6b2008-05-01 20:24:26 +00005071void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00005072{
5073 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00005074 /* Minor optimisation: if not doing origin tracking, ignore the
5075 supplied nia and pass zero instead. This is on the basis that
5076 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
5077 almost always generate a shorter instruction to put zero into a
5078 register than any other value. */
5079 if (MC_(clo_mc_level) < 3)
5080 nia = mkIRExpr_HWord(0);
5081
sewardj826ec492005-05-12 18:05:00 +00005082 di = unsafeIRDirty_0_N(
5083 0/*regparms*/,
5084 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00005085 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00005086 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00005087 );
sewardj7cf4e6b2008-05-01 20:24:26 +00005088 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00005089}
5090
njn25e49d8e72002-09-23 09:36:25 +00005091
sewardj1c0ce7a2009-07-01 08:10:49 +00005092/* ------ Dealing with IRCAS (big and complex) ------ */
5093
5094/* FWDS */
5095static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5096 IRAtom* baseaddr, Int offset );
5097static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
5098static void gen_store_b ( MCEnv* mce, Int szB,
5099 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5100 IRAtom* guard );
5101
5102static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
5103static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
5104
5105
5106/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
5107 IRExpr.Consts, else this asserts. If they are both Consts, it
5108 doesn't do anything. So that just leaves the RdTmp case.
5109
5110 In which case: this assigns the shadow value SHADOW to the IR
5111 shadow temporary associated with ORIG. That is, ORIG, being an
5112 original temporary, will have a shadow temporary associated with
5113 it. However, in the case envisaged here, there will so far have
5114 been no IR emitted to actually write a shadow value into that
5115 temporary. What this routine does is to (emit IR to) copy the
5116 value in SHADOW into said temporary, so that after this call,
5117 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
5118 value in SHADOW.
5119
5120 Point is to allow callers to compute "by hand" a shadow value for
5121 ORIG, and force it to be associated with ORIG.
5122
5123 How do we know that that shadow associated with ORIG has not so far
5124 been assigned to? Well, we don't per se know that, but supposing
5125 it had. Then this routine would create a second assignment to it,
5126 and later the IR sanity checker would barf. But that never
5127 happens. QED.
5128*/
5129static void bind_shadow_tmp_to_orig ( UChar how,
5130 MCEnv* mce,
5131 IRAtom* orig, IRAtom* shadow )
5132{
5133 tl_assert(isOriginalAtom(mce, orig));
5134 tl_assert(isShadowAtom(mce, shadow));
5135 switch (orig->tag) {
5136 case Iex_Const:
5137 tl_assert(shadow->tag == Iex_Const);
5138 break;
5139 case Iex_RdTmp:
5140 tl_assert(shadow->tag == Iex_RdTmp);
5141 if (how == 'V') {
5142 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
5143 shadow);
5144 } else {
5145 tl_assert(how == 'B');
5146 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
5147 shadow);
5148 }
5149 break;
5150 default:
5151 tl_assert(0);
5152 }
5153}
5154
5155
5156static
5157void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
5158{
5159 /* Scheme is (both single- and double- cases):
5160
5161 1. fetch data#,dataB (the proposed new value)
5162
5163 2. fetch expd#,expdB (what we expect to see at the address)
5164
5165 3. check definedness of address
5166
5167 4. load old#,oldB from shadow memory; this also checks
5168 addressibility of the address
5169
5170 5. the CAS itself
5171
sewardjafed4c52009-07-12 13:00:17 +00005172 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
sewardj1c0ce7a2009-07-01 08:10:49 +00005173
sewardjafed4c52009-07-12 13:00:17 +00005174 7. if "expected == old" (as computed by (6))
sewardj1c0ce7a2009-07-01 08:10:49 +00005175 store data#,dataB to shadow memory
5176
5177 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
5178 'data' but 7 stores 'data#'. Hence it is possible for the
5179 shadow data to be incorrectly checked and/or updated:
5180
sewardj1c0ce7a2009-07-01 08:10:49 +00005181 * 7 is at least gated correctly, since the 'expected == old'
5182 condition is derived from outputs of 5. However, the shadow
5183 write could happen too late: imagine after 5 we are
5184 descheduled, a different thread runs, writes a different
5185 (shadow) value at the address, and then we resume, hence
5186 overwriting the shadow value written by the other thread.
5187
5188 Because the original memory access is atomic, there's no way to
5189 make both the original and shadow accesses into a single atomic
5190 thing, hence this is unavoidable.
5191
5192 At least as Valgrind stands, I don't think it's a problem, since
5193 we're single threaded *and* we guarantee that there are no
5194 context switches during the execution of any specific superblock
5195 -- context switches can only happen at superblock boundaries.
5196
5197 If Valgrind ever becomes MT in the future, then it might be more
5198 of a problem. A possible kludge would be to artificially
5199 associate with the location, a lock, which we must acquire and
5200 release around the transaction as a whole. Hmm, that probably
5201 would't work properly since it only guards us against other
5202 threads doing CASs on the same location, not against other
5203 threads doing normal reads and writes.
sewardjafed4c52009-07-12 13:00:17 +00005204
5205 ------------------------------------------------------------
5206
5207 COMMENT_ON_CasCmpEQ:
5208
5209 Note two things. Firstly, in the sequence above, we compute
5210 "expected == old", but we don't check definedness of it. Why
5211 not? Also, the x86 and amd64 front ends use
sewardjb9e6d242013-05-11 13:42:08 +00005212 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
sewardjafed4c52009-07-12 13:00:17 +00005213 determination (expected == old ?) for themselves, and we also
5214 don't check definedness for those primops; we just say that the
5215 result is defined. Why? Details follow.
5216
5217 x86/amd64 contains various forms of locked insns:
5218 * lock prefix before all basic arithmetic insn;
5219 eg lock xorl %reg1,(%reg2)
5220 * atomic exchange reg-mem
5221 * compare-and-swaps
5222
5223 Rather than attempt to represent them all, which would be a
5224 royal PITA, I used a result from Maurice Herlihy
5225 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
5226 demonstrates that compare-and-swap is a primitive more general
5227 than the other two, and so can be used to represent all of them.
5228 So the translation scheme for (eg) lock incl (%reg) is as
5229 follows:
5230
5231 again:
5232 old = * %reg
5233 new = old + 1
5234 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
5235
5236 The "atomically" is the CAS bit. The scheme is always the same:
5237 get old value from memory, compute new value, atomically stuff
5238 new value back in memory iff the old value has not changed (iow,
5239 no other thread modified it in the meantime). If it has changed
5240 then we've been out-raced and we have to start over.
5241
5242 Now that's all very neat, but it has the bad side effect of
5243 introducing an explicit equality test into the translation.
5244 Consider the behaviour of said code on a memory location which
5245 is uninitialised. We will wind up doing a comparison on
5246 uninitialised data, and mc duly complains.
5247
5248 What's difficult about this is, the common case is that the
5249 location is uncontended, and so we're usually comparing the same
5250 value (* %reg) with itself. So we shouldn't complain even if it
5251 is undefined. But mc doesn't know that.
5252
5253 My solution is to mark the == in the IR specially, so as to tell
5254 mc that it almost certainly compares a value with itself, and we
5255 should just regard the result as always defined. Rather than
5256 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
5257 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
5258
5259 So there's always the question of, can this give a false
5260 negative? eg, imagine that initially, * %reg is defined; and we
5261 read that; but then in the gap between the read and the CAS, a
5262 different thread writes an undefined (and different) value at
5263 the location. Then the CAS in this thread will fail and we will
5264 go back to "again:", but without knowing that the trip back
5265 there was based on an undefined comparison. No matter; at least
5266 the other thread won the race and the location is correctly
5267 marked as undefined. What if it wrote an uninitialised version
5268 of the same value that was there originally, though?
5269
5270 etc etc. Seems like there's a small corner case in which we
5271 might lose the fact that something's defined -- we're out-raced
5272 in between the "old = * reg" and the "atomically {", _and_ the
5273 other thread is writing in an undefined version of what's
5274 already there. Well, that seems pretty unlikely.
5275
5276 ---
5277
5278 If we ever need to reinstate it .. code which generates a
5279 definedness test for "expected == old" was removed at r10432 of
5280 this file.
sewardj1c0ce7a2009-07-01 08:10:49 +00005281 */
5282 if (cas->oldHi == IRTemp_INVALID) {
5283 do_shadow_CAS_single( mce, cas );
5284 } else {
5285 do_shadow_CAS_double( mce, cas );
5286 }
5287}
5288
5289
5290static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
5291{
5292 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5293 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5294 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00005295 IRAtom *expd_eq_old = NULL;
5296 IROp opCasCmpEQ;
sewardj1c0ce7a2009-07-01 08:10:49 +00005297 Int elemSzB;
5298 IRType elemTy;
5299 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5300
5301 /* single CAS */
5302 tl_assert(cas->oldHi == IRTemp_INVALID);
5303 tl_assert(cas->expdHi == NULL);
5304 tl_assert(cas->dataHi == NULL);
5305
5306 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5307 switch (elemTy) {
sewardjafed4c52009-07-12 13:00:17 +00005308 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
5309 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
5310 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
5311 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
sewardj1c0ce7a2009-07-01 08:10:49 +00005312 default: tl_assert(0); /* IR defn disallows any other types */
5313 }
5314
5315 /* 1. fetch data# (the proposed new value) */
5316 tl_assert(isOriginalAtom(mce, cas->dataLo));
5317 vdataLo
5318 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5319 tl_assert(isShadowAtom(mce, vdataLo));
5320 if (otrak) {
5321 bdataLo
5322 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5323 tl_assert(isShadowAtom(mce, bdataLo));
5324 }
5325
5326 /* 2. fetch expected# (what we expect to see at the address) */
5327 tl_assert(isOriginalAtom(mce, cas->expdLo));
5328 vexpdLo
5329 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5330 tl_assert(isShadowAtom(mce, vexpdLo));
5331 if (otrak) {
5332 bexpdLo
5333 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5334 tl_assert(isShadowAtom(mce, bexpdLo));
5335 }
5336
5337 /* 3. check definedness of address */
5338 /* 4. fetch old# from shadow memory; this also checks
5339 addressibility of the address */
5340 voldLo
5341 = assignNew(
5342 'V', mce, elemTy,
5343 expr2vbits_Load(
5344 mce,
sewardjcafe5052013-01-17 14:24:35 +00005345 cas->end, elemTy, cas->addr, 0/*Addr bias*/,
5346 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005347 ));
sewardjafed4c52009-07-12 13:00:17 +00005348 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005349 if (otrak) {
5350 boldLo
5351 = assignNew('B', mce, Ity_I32,
5352 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005353 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005354 }
5355
5356 /* 5. the CAS itself */
5357 stmt( 'C', mce, IRStmt_CAS(cas) );
5358
sewardjafed4c52009-07-12 13:00:17 +00005359 /* 6. compute "expected == old" */
5360 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005361 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5362 tree, but it's not copied from the input block. */
5363 expd_eq_old
5364 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005365 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005366
5367 /* 7. if "expected == old"
5368 store data# to shadow memory */
5369 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
5370 NULL/*data*/, vdataLo/*vdata*/,
5371 expd_eq_old/*guard for store*/ );
5372 if (otrak) {
5373 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
5374 bdataLo/*bdata*/,
5375 expd_eq_old/*guard for store*/ );
5376 }
5377}
5378
5379
5380static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
5381{
5382 IRAtom *vdataHi = NULL, *bdataHi = NULL;
5383 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5384 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
5385 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5386 IRAtom *voldHi = NULL, *boldHi = NULL;
5387 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00005388 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
5389 IRAtom *expd_eq_old = NULL, *zero = NULL;
5390 IROp opCasCmpEQ, opOr, opXor;
sewardj1c0ce7a2009-07-01 08:10:49 +00005391 Int elemSzB, memOffsLo, memOffsHi;
5392 IRType elemTy;
5393 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5394
5395 /* double CAS */
5396 tl_assert(cas->oldHi != IRTemp_INVALID);
5397 tl_assert(cas->expdHi != NULL);
5398 tl_assert(cas->dataHi != NULL);
5399
5400 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5401 switch (elemTy) {
5402 case Ity_I8:
sewardjafed4c52009-07-12 13:00:17 +00005403 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
sewardj1c0ce7a2009-07-01 08:10:49 +00005404 elemSzB = 1; zero = mkU8(0);
5405 break;
5406 case Ity_I16:
sewardjafed4c52009-07-12 13:00:17 +00005407 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
sewardj1c0ce7a2009-07-01 08:10:49 +00005408 elemSzB = 2; zero = mkU16(0);
5409 break;
5410 case Ity_I32:
sewardjafed4c52009-07-12 13:00:17 +00005411 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
sewardj1c0ce7a2009-07-01 08:10:49 +00005412 elemSzB = 4; zero = mkU32(0);
5413 break;
5414 case Ity_I64:
sewardjafed4c52009-07-12 13:00:17 +00005415 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
sewardj1c0ce7a2009-07-01 08:10:49 +00005416 elemSzB = 8; zero = mkU64(0);
5417 break;
5418 default:
5419 tl_assert(0); /* IR defn disallows any other types */
5420 }
5421
5422 /* 1. fetch data# (the proposed new value) */
5423 tl_assert(isOriginalAtom(mce, cas->dataHi));
5424 tl_assert(isOriginalAtom(mce, cas->dataLo));
5425 vdataHi
5426 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
5427 vdataLo
5428 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5429 tl_assert(isShadowAtom(mce, vdataHi));
5430 tl_assert(isShadowAtom(mce, vdataLo));
5431 if (otrak) {
5432 bdataHi
5433 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
5434 bdataLo
5435 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5436 tl_assert(isShadowAtom(mce, bdataHi));
5437 tl_assert(isShadowAtom(mce, bdataLo));
5438 }
5439
5440 /* 2. fetch expected# (what we expect to see at the address) */
5441 tl_assert(isOriginalAtom(mce, cas->expdHi));
5442 tl_assert(isOriginalAtom(mce, cas->expdLo));
5443 vexpdHi
5444 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5445 vexpdLo
5446 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5447 tl_assert(isShadowAtom(mce, vexpdHi));
5448 tl_assert(isShadowAtom(mce, vexpdLo));
5449 if (otrak) {
5450 bexpdHi
5451 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5452 bexpdLo
5453 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5454 tl_assert(isShadowAtom(mce, bexpdHi));
5455 tl_assert(isShadowAtom(mce, bexpdLo));
5456 }
5457
5458 /* 3. check definedness of address */
5459 /* 4. fetch old# from shadow memory; this also checks
5460 addressibility of the address */
5461 if (cas->end == Iend_LE) {
5462 memOffsLo = 0;
5463 memOffsHi = elemSzB;
5464 } else {
5465 tl_assert(cas->end == Iend_BE);
5466 memOffsLo = elemSzB;
5467 memOffsHi = 0;
5468 }
5469 voldHi
5470 = assignNew(
5471 'V', mce, elemTy,
5472 expr2vbits_Load(
5473 mce,
sewardjcafe5052013-01-17 14:24:35 +00005474 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/,
5475 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005476 ));
5477 voldLo
5478 = assignNew(
5479 'V', mce, elemTy,
5480 expr2vbits_Load(
5481 mce,
sewardjcafe5052013-01-17 14:24:35 +00005482 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/,
5483 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005484 ));
sewardjafed4c52009-07-12 13:00:17 +00005485 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5486 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005487 if (otrak) {
5488 boldHi
5489 = assignNew('B', mce, Ity_I32,
5490 gen_load_b(mce, elemSzB, cas->addr,
5491 memOffsHi/*addr bias*/));
5492 boldLo
5493 = assignNew('B', mce, Ity_I32,
5494 gen_load_b(mce, elemSzB, cas->addr,
5495 memOffsLo/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005496 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5497 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005498 }
5499
5500 /* 5. the CAS itself */
5501 stmt( 'C', mce, IRStmt_CAS(cas) );
5502
sewardjafed4c52009-07-12 13:00:17 +00005503 /* 6. compute "expected == old" */
5504 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005505 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5506 tree, but it's not copied from the input block. */
5507 /*
5508 xHi = oldHi ^ expdHi;
5509 xLo = oldLo ^ expdLo;
5510 xHL = xHi | xLo;
5511 expd_eq_old = xHL == 0;
5512 */
sewardj1c0ce7a2009-07-01 08:10:49 +00005513 xHi = assignNew('C', mce, elemTy,
5514 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005515 xLo = assignNew('C', mce, elemTy,
5516 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005517 xHL = assignNew('C', mce, elemTy,
5518 binop(opOr, xHi, xLo));
sewardj1c0ce7a2009-07-01 08:10:49 +00005519 expd_eq_old
5520 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005521 binop(opCasCmpEQ, xHL, zero));
sewardj1c0ce7a2009-07-01 08:10:49 +00005522
5523 /* 7. if "expected == old"
5524 store data# to shadow memory */
5525 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5526 NULL/*data*/, vdataHi/*vdata*/,
5527 expd_eq_old/*guard for store*/ );
5528 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5529 NULL/*data*/, vdataLo/*vdata*/,
5530 expd_eq_old/*guard for store*/ );
5531 if (otrak) {
5532 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5533 bdataHi/*bdata*/,
5534 expd_eq_old/*guard for store*/ );
5535 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5536 bdataLo/*bdata*/,
5537 expd_eq_old/*guard for store*/ );
5538 }
5539}
5540
5541
sewardjdb5907d2009-11-26 17:20:21 +00005542/* ------ Dealing with LL/SC (not difficult) ------ */
5543
5544static void do_shadow_LLSC ( MCEnv* mce,
5545 IREndness stEnd,
5546 IRTemp stResult,
5547 IRExpr* stAddr,
5548 IRExpr* stStoredata )
5549{
5550 /* In short: treat a load-linked like a normal load followed by an
5551 assignment of the loaded (shadow) data to the result temporary.
5552 Treat a store-conditional like a normal store, and mark the
5553 result temporary as defined. */
5554 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
5555 IRTemp resTmp = findShadowTmpV(mce, stResult);
5556
5557 tl_assert(isIRAtom(stAddr));
5558 if (stStoredata)
5559 tl_assert(isIRAtom(stStoredata));
5560
5561 if (stStoredata == NULL) {
5562 /* Load Linked */
5563 /* Just treat this as a normal load, followed by an assignment of
5564 the value to .result. */
5565 /* Stay sane */
5566 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5567 || resTy == Ity_I16 || resTy == Ity_I8);
5568 assign( 'V', mce, resTmp,
5569 expr2vbits_Load(
sewardjcafe5052013-01-17 14:24:35 +00005570 mce, stEnd, resTy, stAddr, 0/*addr bias*/,
5571 NULL/*always happens*/) );
sewardjdb5907d2009-11-26 17:20:21 +00005572 } else {
5573 /* Store Conditional */
5574 /* Stay sane */
5575 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
5576 stStoredata);
5577 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
5578 || dataTy == Ity_I16 || dataTy == Ity_I8);
5579 do_shadow_Store( mce, stEnd,
5580 stAddr, 0/* addr bias */,
5581 stStoredata,
5582 NULL /* shadow data */,
5583 NULL/*guard*/ );
5584 /* This is a store conditional, so it writes to .result a value
5585 indicating whether or not the store succeeded. Just claim
5586 this value is always defined. In the PowerPC interpretation
5587 of store-conditional, definedness of the success indication
5588 depends on whether the address of the store matches the
5589 reservation address. But we can't tell that here (and
5590 anyway, we're not being PowerPC-specific). At least we are
5591 guaranteed that the definedness of the store address, and its
5592 addressibility, will be checked as per normal. So it seems
5593 pretty safe to just say that the success indication is always
5594 defined.
5595
5596 In schemeS, for origin tracking, we must correspondingly set
5597 a no-origin value for the origin shadow of .result.
5598 */
5599 tl_assert(resTy == Ity_I1);
5600 assign( 'V', mce, resTmp, definedOfType(resTy) );
5601 }
5602}
5603
5604
sewardjcafe5052013-01-17 14:24:35 +00005605/* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
5606
5607static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg )
5608{
sewardjb9e6d242013-05-11 13:42:08 +00005609 complainIfUndefined(mce, sg->guard, NULL);
5610 /* do_shadow_Store will generate code to check the definedness and
5611 validity of sg->addr, in the case where sg->guard evaluates to
5612 True at run-time. */
sewardjcafe5052013-01-17 14:24:35 +00005613 do_shadow_Store( mce, sg->end,
5614 sg->addr, 0/* addr bias */,
5615 sg->data,
5616 NULL /* shadow data */,
5617 sg->guard );
5618}
5619
5620static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg )
5621{
sewardjb9e6d242013-05-11 13:42:08 +00005622 complainIfUndefined(mce, lg->guard, NULL);
5623 /* expr2vbits_Load_guarded_General will generate code to check the
5624 definedness and validity of lg->addr, in the case where
5625 lg->guard evaluates to True at run-time. */
sewardjcafe5052013-01-17 14:24:35 +00005626
5627 /* Look at the LoadG's built-in conversion operation, to determine
5628 the source (actual loaded data) type, and the equivalent IROp.
5629 NOTE that implicitly we are taking a widening operation to be
5630 applied to original atoms and producing one that applies to V
5631 bits. Since signed and unsigned widening are self-shadowing,
5632 this is a straight copy of the op (modulo swapping from the
5633 IRLoadGOp form to the IROp form). Note also therefore that this
5634 implicitly duplicates the logic to do with said widening ops in
5635 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */
5636 IROp vwiden = Iop_INVALID;
5637 IRType loadedTy = Ity_INVALID;
5638 switch (lg->cvt) {
5639 case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break;
5640 case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break;
5641 case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break;
5642 case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break;
5643 case ILGop_8Sto32: loadedTy = Ity_I8; vwiden = Iop_8Sto32; break;
5644 default: VG_(tool_panic)("do_shadow_LoadG");
5645 }
5646
5647 IRAtom* vbits_alt
5648 = expr2vbits( mce, lg->alt );
5649 IRAtom* vbits_final
5650 = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy,
5651 lg->addr, 0/*addr bias*/,
5652 lg->guard, vwiden, vbits_alt );
5653 /* And finally, bind the V bits to the destination temporary. */
5654 assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final );
5655}
5656
5657
sewardj95448072004-11-22 20:19:51 +00005658/*------------------------------------------------------------*/
5659/*--- Memcheck main ---*/
5660/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00005661
sewardj7cf4e6b2008-05-01 20:24:26 +00005662static void schemeS ( MCEnv* mce, IRStmt* st );
5663
sewardj95448072004-11-22 20:19:51 +00005664static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00005665{
sewardj95448072004-11-22 20:19:51 +00005666 ULong n = 0;
5667 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00005668 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00005669 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00005670 return False;
5671 tl_assert(at->tag == Iex_Const);
5672 con = at->Iex.Const.con;
5673 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00005674 case Ico_U1: return False;
5675 case Ico_U8: n = (ULong)con->Ico.U8; break;
5676 case Ico_U16: n = (ULong)con->Ico.U16; break;
5677 case Ico_U32: n = (ULong)con->Ico.U32; break;
5678 case Ico_U64: n = (ULong)con->Ico.U64; break;
5679 case Ico_F64: return False;
sewardjb5b87402011-03-07 16:05:35 +00005680 case Ico_F32i: return False;
sewardjd5204dc2004-12-31 01:16:11 +00005681 case Ico_F64i: return False;
5682 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00005683 default: ppIRExpr(at); tl_assert(0);
5684 }
5685 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00005686 return (/*32*/ n == 0xFEFEFEFFULL
5687 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00005688 /*32*/ || n == 0x7F7F7F7FULL
tomd9774d72005-06-27 08:11:01 +00005689 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00005690 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00005691 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00005692 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00005693 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00005694 );
sewardj95448072004-11-22 20:19:51 +00005695}
njn25e49d8e72002-09-23 09:36:25 +00005696
sewardj95448072004-11-22 20:19:51 +00005697static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
5698{
sewardjd5204dc2004-12-31 01:16:11 +00005699 Int i;
5700 IRExpr* e;
5701 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00005702 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00005703 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00005704 case Ist_WrTmp:
5705 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00005706 switch (e->tag) {
5707 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00005708 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00005709 return False;
sewardjd5204dc2004-12-31 01:16:11 +00005710 case Iex_Const:
5711 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00005712 case Iex_Unop:
5713 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00005714 case Iex_GetI:
5715 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00005716 case Iex_Binop:
5717 return isBogusAtom(e->Iex.Binop.arg1)
5718 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00005719 case Iex_Triop:
florian26441742012-06-02 20:30:41 +00005720 return isBogusAtom(e->Iex.Triop.details->arg1)
5721 || isBogusAtom(e->Iex.Triop.details->arg2)
5722 || isBogusAtom(e->Iex.Triop.details->arg3);
sewardje91cea72006-02-08 19:32:02 +00005723 case Iex_Qop:
floriane2ab2972012-06-01 20:43:03 +00005724 return isBogusAtom(e->Iex.Qop.details->arg1)
5725 || isBogusAtom(e->Iex.Qop.details->arg2)
5726 || isBogusAtom(e->Iex.Qop.details->arg3)
5727 || isBogusAtom(e->Iex.Qop.details->arg4);
florian5686b2d2013-01-29 03:57:40 +00005728 case Iex_ITE:
5729 return isBogusAtom(e->Iex.ITE.cond)
5730 || isBogusAtom(e->Iex.ITE.iftrue)
5731 || isBogusAtom(e->Iex.ITE.iffalse);
sewardj2e595852005-06-30 23:33:37 +00005732 case Iex_Load:
5733 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00005734 case Iex_CCall:
5735 for (i = 0; e->Iex.CCall.args[i]; i++)
5736 if (isBogusAtom(e->Iex.CCall.args[i]))
5737 return True;
5738 return False;
5739 default:
5740 goto unhandled;
5741 }
sewardjd5204dc2004-12-31 01:16:11 +00005742 case Ist_Dirty:
5743 d = st->Ist.Dirty.details;
5744 for (i = 0; d->args[i]; i++)
5745 if (isBogusAtom(d->args[i]))
5746 return True;
florian6c0aa2c2013-01-21 01:27:22 +00005747 if (isBogusAtom(d->guard))
sewardjd5204dc2004-12-31 01:16:11 +00005748 return True;
5749 if (d->mAddr && isBogusAtom(d->mAddr))
5750 return True;
5751 return False;
sewardj95448072004-11-22 20:19:51 +00005752 case Ist_Put:
5753 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00005754 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00005755 return isBogusAtom(st->Ist.PutI.details->ix)
5756 || isBogusAtom(st->Ist.PutI.details->data);
sewardj2e595852005-06-30 23:33:37 +00005757 case Ist_Store:
5758 return isBogusAtom(st->Ist.Store.addr)
5759 || isBogusAtom(st->Ist.Store.data);
sewardjcafe5052013-01-17 14:24:35 +00005760 case Ist_StoreG: {
5761 IRStoreG* sg = st->Ist.StoreG.details;
5762 return isBogusAtom(sg->addr) || isBogusAtom(sg->data)
5763 || isBogusAtom(sg->guard);
5764 }
5765 case Ist_LoadG: {
5766 IRLoadG* lg = st->Ist.LoadG.details;
5767 return isBogusAtom(lg->addr) || isBogusAtom(lg->alt)
5768 || isBogusAtom(lg->guard);
5769 }
sewardj95448072004-11-22 20:19:51 +00005770 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00005771 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00005772 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00005773 return isBogusAtom(st->Ist.AbiHint.base)
5774 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00005775 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00005776 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00005777 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00005778 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005779 case Ist_CAS:
5780 cas = st->Ist.CAS.details;
5781 return isBogusAtom(cas->addr)
5782 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
5783 || isBogusAtom(cas->expdLo)
5784 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
5785 || isBogusAtom(cas->dataLo);
sewardjdb5907d2009-11-26 17:20:21 +00005786 case Ist_LLSC:
5787 return isBogusAtom(st->Ist.LLSC.addr)
5788 || (st->Ist.LLSC.storedata
5789 ? isBogusAtom(st->Ist.LLSC.storedata)
5790 : False);
sewardj95448072004-11-22 20:19:51 +00005791 default:
5792 unhandled:
5793 ppIRStmt(st);
5794 VG_(tool_panic)("hasBogusLiterals");
5795 }
5796}
njn25e49d8e72002-09-23 09:36:25 +00005797
njn25e49d8e72002-09-23 09:36:25 +00005798
sewardj0b9d74a2006-12-24 02:24:11 +00005799IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00005800 IRSB* sb_in,
sewardj461df9c2006-01-17 02:06:39 +00005801 VexGuestLayout* layout,
5802 VexGuestExtents* vge,
florianca503be2012-10-07 21:59:42 +00005803 VexArchInfo* archinfo_host,
sewardjd54babf2005-03-21 00:55:49 +00005804 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00005805{
sewardj7cf4e6b2008-05-01 20:24:26 +00005806 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00005807 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00005808 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00005809 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00005810 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00005811 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00005812
5813 if (gWordTy != hWordTy) {
5814 /* We don't currently support this case. */
5815 VG_(tool_panic)("host/guest word size mismatch");
5816 }
njn25e49d8e72002-09-23 09:36:25 +00005817
sewardj6cf40ff2005-04-20 22:31:26 +00005818 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00005819 tl_assert(sizeof(UWord) == sizeof(void*));
5820 tl_assert(sizeof(Word) == sizeof(void*));
5821 tl_assert(sizeof(Addr) == sizeof(void*));
5822 tl_assert(sizeof(ULong) == 8);
5823 tl_assert(sizeof(Long) == 8);
5824 tl_assert(sizeof(Addr64) == 8);
5825 tl_assert(sizeof(UInt) == 4);
5826 tl_assert(sizeof(Int) == 4);
5827
5828 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00005829
sewardj0b9d74a2006-12-24 02:24:11 +00005830 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00005831 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00005832
sewardj1c0ce7a2009-07-01 08:10:49 +00005833 /* Set up the running environment. Both .sb and .tmpMap are
5834 modified as we go along. Note that tmps are added to both
5835 .sb->tyenv and .tmpMap together, so the valid index-set for
5836 those two arrays should always be identical. */
5837 VG_(memset)(&mce, 0, sizeof(mce));
5838 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00005839 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00005840 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00005841 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00005842 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005843
sewardj54eac252012-03-27 10:19:39 +00005844 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
5845 Darwin. 10.7 is mostly built with LLVM, which uses these for
5846 bitfield inserts, and we get a lot of false errors if the cheap
5847 interpretation is used, alas. Could solve this much better if
5848 we knew which of such adds came from x86/amd64 LEA instructions,
5849 since these are the only ones really needing the expensive
5850 interpretation, but that would require some way to tag them in
5851 the _toIR.c front ends, which is a lot of faffing around. So
5852 for now just use the slow and blunt-instrument solution. */
5853 mce.useLLVMworkarounds = False;
5854# if defined(VGO_darwin)
5855 mce.useLLVMworkarounds = True;
5856# endif
5857
sewardj1c0ce7a2009-07-01 08:10:49 +00005858 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
5859 sizeof(TempMapEnt));
5860 for (i = 0; i < sb_in->tyenv->types_used; i++) {
5861 TempMapEnt ent;
5862 ent.kind = Orig;
5863 ent.shadowV = IRTemp_INVALID;
5864 ent.shadowB = IRTemp_INVALID;
5865 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00005866 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005867 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00005868
sewardj151b90d2005-07-06 19:42:23 +00005869 /* Make a preliminary inspection of the statements, to see if there
5870 are any dodgy-looking literals. If there are, we generate
5871 extra-detailed (hence extra-expensive) instrumentation in
5872 places. Scan the whole bb even if dodgyness is found earlier,
5873 so that the flatness assertion is applied to all stmts. */
5874
5875 bogus = False;
sewardj95448072004-11-22 20:19:51 +00005876
sewardj1c0ce7a2009-07-01 08:10:49 +00005877 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00005878
sewardj1c0ce7a2009-07-01 08:10:49 +00005879 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00005880 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00005881 tl_assert(isFlatIRStmt(st));
5882
sewardj151b90d2005-07-06 19:42:23 +00005883 if (!bogus) {
5884 bogus = checkForBogusLiterals(st);
5885 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00005886 VG_(printf)("bogus: ");
5887 ppIRStmt(st);
5888 VG_(printf)("\n");
5889 }
5890 }
sewardjd5204dc2004-12-31 01:16:11 +00005891
sewardj151b90d2005-07-06 19:42:23 +00005892 }
5893
5894 mce.bogusLiterals = bogus;
5895
sewardja0871482006-10-18 12:41:55 +00005896 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00005897
sewardj1c0ce7a2009-07-01 08:10:49 +00005898 tl_assert(mce.sb == sb_out);
5899 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00005900
sewardja0871482006-10-18 12:41:55 +00005901 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00005902 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00005903
sewardj1c0ce7a2009-07-01 08:10:49 +00005904 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00005905 tl_assert(st);
5906 tl_assert(isFlatIRStmt(st));
5907
sewardj1c0ce7a2009-07-01 08:10:49 +00005908 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00005909 i++;
5910 }
5911
sewardjf1962d32006-10-19 13:22:16 +00005912 /* Nasty problem. IR optimisation of the pre-instrumented IR may
5913 cause the IR following the preamble to contain references to IR
5914 temporaries defined in the preamble. Because the preamble isn't
5915 instrumented, these temporaries don't have any shadows.
5916 Nevertheless uses of them following the preamble will cause
5917 memcheck to generate references to their shadows. End effect is
5918 to cause IR sanity check failures, due to references to
5919 non-existent shadows. This is only evident for the complex
5920 preambles used for function wrapping on TOC-afflicted platforms
sewardj6e9de462011-06-28 07:25:29 +00005921 (ppc64-linux).
sewardjf1962d32006-10-19 13:22:16 +00005922
5923 The following loop therefore scans the preamble looking for
5924 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00005925 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00005926 'defined'. This is the same resulting IR as if the main
5927 instrumentation loop before had been applied to the statement
5928 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00005929
5930 Similarly, if origin tracking is enabled, we must generate an
5931 assignment for the corresponding origin (B) shadow, claiming
5932 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00005933 */
5934 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005935 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005936 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00005937 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005938 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00005939 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00005940 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00005941 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
5942 if (MC_(clo_mc_level) == 3) {
5943 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00005944 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00005945 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
5946 }
sewardjf1962d32006-10-19 13:22:16 +00005947 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00005948 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
5949 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00005950 VG_(printf)("\n");
5951 }
5952 }
5953 }
5954
sewardja0871482006-10-18 12:41:55 +00005955 /* Iterate over the remaining stmts to generate instrumentation. */
5956
sewardj1c0ce7a2009-07-01 08:10:49 +00005957 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00005958 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00005959 tl_assert(i < sb_in->stmts_used);
5960 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00005961
sewardj1c0ce7a2009-07-01 08:10:49 +00005962 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00005963
sewardj1c0ce7a2009-07-01 08:10:49 +00005964 st = sb_in->stmts[i];
5965 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00005966
5967 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005968 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00005969 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00005970 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00005971 }
5972
sewardj1c0ce7a2009-07-01 08:10:49 +00005973 if (MC_(clo_mc_level) == 3) {
5974 /* See comments on case Ist_CAS below. */
5975 if (st->tag != Ist_CAS)
5976 schemeS( &mce, st );
5977 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005978
sewardj29faa502005-03-16 18:20:21 +00005979 /* Generate instrumentation code for each stmt ... */
5980
sewardj95448072004-11-22 20:19:51 +00005981 switch (st->tag) {
5982
sewardj0b9d74a2006-12-24 02:24:11 +00005983 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00005984 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
5985 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00005986 break;
5987
sewardj95448072004-11-22 20:19:51 +00005988 case Ist_Put:
5989 do_shadow_PUT( &mce,
5990 st->Ist.Put.offset,
5991 st->Ist.Put.data,
florian434ffae2012-07-19 17:23:42 +00005992 NULL /* shadow atom */, NULL /* guard */ );
njn25e49d8e72002-09-23 09:36:25 +00005993 break;
5994
sewardj95448072004-11-22 20:19:51 +00005995 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00005996 do_shadow_PUTI( &mce, st->Ist.PutI.details);
njn25e49d8e72002-09-23 09:36:25 +00005997 break;
5998
sewardj2e595852005-06-30 23:33:37 +00005999 case Ist_Store:
6000 do_shadow_Store( &mce, st->Ist.Store.end,
6001 st->Ist.Store.addr, 0/* addr bias */,
6002 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00006003 NULL /* shadow data */,
6004 NULL/*guard*/ );
njn25e49d8e72002-09-23 09:36:25 +00006005 break;
6006
sewardjcafe5052013-01-17 14:24:35 +00006007 case Ist_StoreG:
6008 do_shadow_StoreG( &mce, st->Ist.StoreG.details );
6009 break;
6010
6011 case Ist_LoadG:
6012 do_shadow_LoadG( &mce, st->Ist.LoadG.details );
6013 break;
6014
sewardj95448072004-11-22 20:19:51 +00006015 case Ist_Exit:
sewardjb9e6d242013-05-11 13:42:08 +00006016 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
njn25e49d8e72002-09-23 09:36:25 +00006017 break;
6018
sewardj29faa502005-03-16 18:20:21 +00006019 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00006020 break;
6021
6022 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00006023 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00006024 break;
6025
sewardj95448072004-11-22 20:19:51 +00006026 case Ist_Dirty:
6027 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00006028 break;
6029
sewardj826ec492005-05-12 18:05:00 +00006030 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00006031 do_AbiHint( &mce, st->Ist.AbiHint.base,
6032 st->Ist.AbiHint.len,
6033 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00006034 break;
6035
sewardj1c0ce7a2009-07-01 08:10:49 +00006036 case Ist_CAS:
6037 do_shadow_CAS( &mce, st->Ist.CAS.details );
6038 /* Note, do_shadow_CAS copies the CAS itself to the output
6039 block, because it needs to add instrumentation both
6040 before and after it. Hence skip the copy below. Also
6041 skip the origin-tracking stuff (call to schemeS) above,
6042 since that's all tangled up with it too; do_shadow_CAS
6043 does it all. */
6044 break;
6045
sewardjdb5907d2009-11-26 17:20:21 +00006046 case Ist_LLSC:
6047 do_shadow_LLSC( &mce,
6048 st->Ist.LLSC.end,
6049 st->Ist.LLSC.result,
6050 st->Ist.LLSC.addr,
6051 st->Ist.LLSC.storedata );
6052 break;
6053
njn25e49d8e72002-09-23 09:36:25 +00006054 default:
sewardj95448072004-11-22 20:19:51 +00006055 VG_(printf)("\n");
6056 ppIRStmt(st);
6057 VG_(printf)("\n");
6058 VG_(tool_panic)("memcheck: unhandled IRStmt");
6059
6060 } /* switch (st->tag) */
6061
sewardj7cf4e6b2008-05-01 20:24:26 +00006062 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006063 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00006064 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00006065 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00006066 VG_(printf)("\n");
6067 }
6068 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006069 }
sewardj95448072004-11-22 20:19:51 +00006070
sewardj1c0ce7a2009-07-01 08:10:49 +00006071 /* ... and finally copy the stmt itself to the output. Except,
6072 skip the copy of IRCASs; see comments on case Ist_CAS
6073 above. */
6074 if (st->tag != Ist_CAS)
6075 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00006076 }
njn25e49d8e72002-09-23 09:36:25 +00006077
sewardj95448072004-11-22 20:19:51 +00006078 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00006079 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00006080
sewardj95448072004-11-22 20:19:51 +00006081 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006082 VG_(printf)("sb_in->next = ");
6083 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00006084 VG_(printf)("\n\n");
6085 }
njn25e49d8e72002-09-23 09:36:25 +00006086
sewardjb9e6d242013-05-11 13:42:08 +00006087 complainIfUndefined( &mce, sb_in->next, NULL );
njn25e49d8e72002-09-23 09:36:25 +00006088
sewardj7cf4e6b2008-05-01 20:24:26 +00006089 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006090 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00006091 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00006092 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00006093 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006094 }
sewardj95448072004-11-22 20:19:51 +00006095 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006096 }
njn25e49d8e72002-09-23 09:36:25 +00006097
sewardj1c0ce7a2009-07-01 08:10:49 +00006098 /* If this fails, there's been some serious snafu with tmp management,
6099 that should be investigated. */
6100 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
6101 VG_(deleteXA)( mce.tmpMap );
6102
6103 tl_assert(mce.sb == sb_out);
6104 return sb_out;
sewardj95448072004-11-22 20:19:51 +00006105}
njn25e49d8e72002-09-23 09:36:25 +00006106
sewardj81651dc2007-08-28 06:05:20 +00006107/*------------------------------------------------------------*/
6108/*--- Post-tree-build final tidying ---*/
6109/*------------------------------------------------------------*/
6110
6111/* This exploits the observation that Memcheck often produces
6112 repeated conditional calls of the form
6113
sewardj7cf4e6b2008-05-01 20:24:26 +00006114 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00006115
6116 with the same guard expression G guarding the same helper call.
6117 The second and subsequent calls are redundant. This usually
6118 results from instrumentation of guest code containing multiple
6119 memory references at different constant offsets from the same base
6120 register. After optimisation of the instrumentation, you get a
6121 test for the definedness of the base register for each memory
6122 reference, which is kinda pointless. MC_(final_tidy) therefore
6123 looks for such repeated calls and removes all but the first. */
6124
6125/* A struct for recording which (helper, guard) pairs we have already
6126 seen. */
6127typedef
6128 struct { void* entry; IRExpr* guard; }
6129 Pair;
6130
6131/* Return True if e1 and e2 definitely denote the same value (used to
6132 compare guards). Return False if unknown; False is the safe
6133 answer. Since guest registers and guest memory do not have the
6134 SSA property we must return False if any Gets or Loads appear in
6135 the expression. */
6136
6137static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
6138{
6139 if (e1->tag != e2->tag)
6140 return False;
6141 switch (e1->tag) {
6142 case Iex_Const:
6143 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
6144 case Iex_Binop:
6145 return e1->Iex.Binop.op == e2->Iex.Binop.op
6146 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
6147 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
6148 case Iex_Unop:
6149 return e1->Iex.Unop.op == e2->Iex.Unop.op
6150 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
6151 case Iex_RdTmp:
6152 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
florian5686b2d2013-01-29 03:57:40 +00006153 case Iex_ITE:
6154 return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond )
6155 && sameIRValue( e1->Iex.ITE.iftrue, e2->Iex.ITE.iftrue )
6156 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse );
sewardj81651dc2007-08-28 06:05:20 +00006157 case Iex_Qop:
6158 case Iex_Triop:
6159 case Iex_CCall:
6160 /* be lazy. Could define equality for these, but they never
6161 appear to be used. */
6162 return False;
6163 case Iex_Get:
6164 case Iex_GetI:
6165 case Iex_Load:
6166 /* be conservative - these may not give the same value each
6167 time */
6168 return False;
6169 case Iex_Binder:
6170 /* should never see this */
6171 /* fallthrough */
6172 default:
6173 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
6174 ppIRExpr(e1);
6175 VG_(tool_panic)("memcheck:sameIRValue");
6176 return False;
6177 }
6178}
6179
6180/* See if 'pairs' already has an entry for (entry, guard). Return
6181 True if so. If not, add an entry. */
6182
6183static
6184Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
6185{
6186 Pair p;
6187 Pair* pp;
6188 Int i, n = VG_(sizeXA)( pairs );
6189 for (i = 0; i < n; i++) {
6190 pp = VG_(indexXA)( pairs, i );
6191 if (pp->entry == entry && sameIRValue(pp->guard, guard))
6192 return True;
6193 }
6194 p.guard = guard;
6195 p.entry = entry;
6196 VG_(addToXA)( pairs, &p );
6197 return False;
6198}
6199
florian11f3cc82012-10-21 02:19:35 +00006200static Bool is_helperc_value_checkN_fail ( const HChar* name )
sewardj81651dc2007-08-28 06:05:20 +00006201{
6202 return
sewardj7cf4e6b2008-05-01 20:24:26 +00006203 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
6204 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
6205 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
6206 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
6207 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
6208 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
6209 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
6210 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00006211}
6212
6213IRSB* MC_(final_tidy) ( IRSB* sb_in )
6214{
6215 Int i;
6216 IRStmt* st;
6217 IRDirty* di;
6218 IRExpr* guard;
6219 IRCallee* cee;
6220 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00006221 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
6222 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00006223 /* Scan forwards through the statements. Each time a call to one
6224 of the relevant helpers is seen, check if we have made a
6225 previous call to the same helper using the same guard
6226 expression, and if so, delete the call. */
6227 for (i = 0; i < sb_in->stmts_used; i++) {
6228 st = sb_in->stmts[i];
6229 tl_assert(st);
6230 if (st->tag != Ist_Dirty)
6231 continue;
6232 di = st->Ist.Dirty.details;
6233 guard = di->guard;
florian6c0aa2c2013-01-21 01:27:22 +00006234 tl_assert(guard);
sewardj81651dc2007-08-28 06:05:20 +00006235 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
6236 cee = di->cee;
6237 if (!is_helperc_value_checkN_fail( cee->name ))
6238 continue;
6239 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
6240 guard 'guard'. Check if we have already seen a call to this
6241 function with the same guard. If so, delete it. If not,
6242 add it to the set of calls we do know about. */
6243 alreadyPresent = check_or_add( pairs, guard, cee->addr );
6244 if (alreadyPresent) {
6245 sb_in->stmts[i] = IRStmt_NoOp();
6246 if (0) VG_(printf)("XX\n");
6247 }
6248 }
6249 VG_(deleteXA)( pairs );
6250 return sb_in;
6251}
6252
6253
sewardj7cf4e6b2008-05-01 20:24:26 +00006254/*------------------------------------------------------------*/
6255/*--- Origin tracking stuff ---*/
6256/*------------------------------------------------------------*/
6257
sewardj1c0ce7a2009-07-01 08:10:49 +00006258/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00006259static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
6260{
sewardj1c0ce7a2009-07-01 08:10:49 +00006261 TempMapEnt* ent;
6262 /* VG_(indexXA) range-checks 'orig', hence no need to check
6263 here. */
6264 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6265 tl_assert(ent->kind == Orig);
6266 if (ent->shadowB == IRTemp_INVALID) {
6267 IRTemp tmpB
6268 = newTemp( mce, Ity_I32, BSh );
6269 /* newTemp may cause mce->tmpMap to resize, hence previous results
6270 from VG_(indexXA) are invalid. */
6271 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6272 tl_assert(ent->kind == Orig);
6273 tl_assert(ent->shadowB == IRTemp_INVALID);
6274 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00006275 }
sewardj1c0ce7a2009-07-01 08:10:49 +00006276 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00006277}
6278
6279static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
6280{
6281 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
6282}
6283
sewardjcafe5052013-01-17 14:24:35 +00006284
6285/* Make a guarded origin load, with no special handling in the
6286 didn't-happen case. A GUARD of NULL is assumed to mean "always
6287 True".
6288
6289 Generate IR to do a shadow origins load from BASEADDR+OFFSET and
6290 return the otag. The loaded size is SZB. If GUARD evaluates to
6291 False at run time then the returned otag is zero.
6292*/
6293static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB,
6294 IRAtom* baseaddr,
6295 Int offset, IRExpr* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00006296{
6297 void* hFun;
florian6bd9dc12012-11-23 16:17:43 +00006298 const HChar* hName;
sewardj7cf4e6b2008-05-01 20:24:26 +00006299 IRTemp bTmp;
6300 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00006301 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00006302 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6303 IRAtom* ea = baseaddr;
6304 if (offset != 0) {
6305 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6306 : mkU64( (Long)(Int)offset );
6307 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6308 }
sewardj1c0ce7a2009-07-01 08:10:49 +00006309 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00006310
6311 switch (szB) {
6312 case 1: hFun = (void*)&MC_(helperc_b_load1);
6313 hName = "MC_(helperc_b_load1)";
6314 break;
6315 case 2: hFun = (void*)&MC_(helperc_b_load2);
6316 hName = "MC_(helperc_b_load2)";
6317 break;
6318 case 4: hFun = (void*)&MC_(helperc_b_load4);
6319 hName = "MC_(helperc_b_load4)";
6320 break;
6321 case 8: hFun = (void*)&MC_(helperc_b_load8);
6322 hName = "MC_(helperc_b_load8)";
6323 break;
6324 case 16: hFun = (void*)&MC_(helperc_b_load16);
6325 hName = "MC_(helperc_b_load16)";
6326 break;
sewardj45fa9f42012-05-21 10:18:10 +00006327 case 32: hFun = (void*)&MC_(helperc_b_load32);
6328 hName = "MC_(helperc_b_load32)";
6329 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00006330 default:
6331 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
6332 tl_assert(0);
6333 }
6334 di = unsafeIRDirty_1_N(
6335 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
6336 mkIRExprVec_1( ea )
6337 );
sewardjcafe5052013-01-17 14:24:35 +00006338 if (guard) {
6339 di->guard = guard;
6340 /* Ideally the didn't-happen return value here would be
6341 all-zeroes (unknown-origin), so it'd be harmless if it got
6342 used inadvertantly. We slum it out with the IR-mandated
6343 default value (0b01 repeating, 0x55 etc) as that'll probably
6344 trump all legitimate otags via Max32, and it's pretty
6345 obviously bogus. */
6346 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006347 /* no need to mess with any annotations. This call accesses
6348 neither guest state nor guest memory. */
6349 stmt( 'B', mce, IRStmt_Dirty(di) );
6350 if (mce->hWordTy == Ity_I64) {
6351 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00006352 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00006353 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
6354 return mkexpr(bTmp32);
6355 } else {
6356 /* 32-bit host */
6357 return mkexpr(bTmp);
6358 }
6359}
sewardj1c0ce7a2009-07-01 08:10:49 +00006360
sewardjcafe5052013-01-17 14:24:35 +00006361
6362/* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The
6363 loaded size is SZB. The load is regarded as unconditional (always
6364 happens).
6365*/
6366static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
6367 Int offset )
florian434ffae2012-07-19 17:23:42 +00006368{
sewardjcafe5052013-01-17 14:24:35 +00006369 return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/);
florian434ffae2012-07-19 17:23:42 +00006370}
6371
sewardjcafe5052013-01-17 14:24:35 +00006372
6373/* The most general handler for guarded origin loads. A GUARD of NULL
6374 is assumed to mean "always True".
6375
6376 Generate IR to do a shadow origin load from ADDR+BIAS and return
6377 the B bits. The loaded type is TY. If GUARD evaluates to False at
6378 run time then the returned B bits are simply BALT instead.
6379*/
6380static
6381IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce,
6382 IRType ty,
6383 IRAtom* addr, UInt bias,
6384 IRAtom* guard, IRAtom* balt )
6385{
6386 /* If the guard evaluates to True, this will hold the loaded
6387 origin. If the guard evaluates to False, this will be zero,
6388 meaning "unknown origin", in which case we will have to replace
florian5686b2d2013-01-29 03:57:40 +00006389 it using an ITE below. */
sewardjcafe5052013-01-17 14:24:35 +00006390 IRAtom* iftrue
6391 = assignNew('B', mce, Ity_I32,
6392 gen_guarded_load_b(mce, sizeofIRType(ty),
6393 addr, bias, guard));
6394 /* These are the bits we will return if the load doesn't take
6395 place. */
6396 IRAtom* iffalse
6397 = balt;
florian5686b2d2013-01-29 03:57:40 +00006398 /* Prepare the cond for the ITE. Convert a NULL cond into
sewardjcafe5052013-01-17 14:24:35 +00006399 something that iropt knows how to fold out later. */
6400 IRAtom* cond
sewardjcc961652013-01-26 11:49:15 +00006401 = guard == NULL ? mkU1(1) : guard;
sewardjcafe5052013-01-17 14:24:35 +00006402 /* And assemble the final result. */
florian5686b2d2013-01-29 03:57:40 +00006403 return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse));
sewardjcafe5052013-01-17 14:24:35 +00006404}
6405
6406
6407/* Generate a shadow origins store. guard :: Ity_I1 controls whether
6408 the store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00006409static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00006410 IRAtom* baseaddr, Int offset, IRAtom* dataB,
6411 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00006412{
6413 void* hFun;
florian6bd9dc12012-11-23 16:17:43 +00006414 const HChar* hName;
sewardj7cf4e6b2008-05-01 20:24:26 +00006415 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00006416 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00006417 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6418 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00006419 if (guard) {
6420 tl_assert(isOriginalAtom(mce, guard));
6421 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
6422 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006423 if (offset != 0) {
6424 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6425 : mkU64( (Long)(Int)offset );
6426 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6427 }
6428 if (mce->hWordTy == Ity_I64)
6429 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
6430
6431 switch (szB) {
6432 case 1: hFun = (void*)&MC_(helperc_b_store1);
6433 hName = "MC_(helperc_b_store1)";
6434 break;
6435 case 2: hFun = (void*)&MC_(helperc_b_store2);
6436 hName = "MC_(helperc_b_store2)";
6437 break;
6438 case 4: hFun = (void*)&MC_(helperc_b_store4);
6439 hName = "MC_(helperc_b_store4)";
6440 break;
6441 case 8: hFun = (void*)&MC_(helperc_b_store8);
6442 hName = "MC_(helperc_b_store8)";
6443 break;
6444 case 16: hFun = (void*)&MC_(helperc_b_store16);
6445 hName = "MC_(helperc_b_store16)";
6446 break;
sewardj45fa9f42012-05-21 10:18:10 +00006447 case 32: hFun = (void*)&MC_(helperc_b_store32);
6448 hName = "MC_(helperc_b_store32)";
6449 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00006450 default:
6451 tl_assert(0);
6452 }
6453 di = unsafeIRDirty_0_N( 2/*regparms*/,
6454 hName, VG_(fnptr_to_fnentry)( hFun ),
6455 mkIRExprVec_2( ea, dataB )
6456 );
6457 /* no need to mess with any annotations. This call accesses
6458 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00006459 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00006460 stmt( 'B', mce, IRStmt_Dirty(di) );
6461}
6462
6463static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006464 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00006465 if (eTy == Ity_I64)
6466 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
6467 if (eTy == Ity_I32)
6468 return e;
6469 tl_assert(0);
6470}
6471
6472static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006473 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00006474 tl_assert(eTy == Ity_I32);
6475 if (dstTy == Ity_I64)
6476 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
6477 tl_assert(0);
6478}
6479
sewardjdb5907d2009-11-26 17:20:21 +00006480
sewardj7cf4e6b2008-05-01 20:24:26 +00006481static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
6482{
6483 tl_assert(MC_(clo_mc_level) == 3);
6484
6485 switch (e->tag) {
6486
6487 case Iex_GetI: {
6488 IRRegArray* descr_b;
6489 IRAtom *t1, *t2, *t3, *t4;
6490 IRRegArray* descr = e->Iex.GetI.descr;
6491 IRType equivIntTy
6492 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6493 /* If this array is unshadowable for whatever reason, use the
6494 usual approximation. */
6495 if (equivIntTy == Ity_INVALID)
6496 return mkU32(0);
6497 tl_assert(sizeofIRType(equivIntTy) >= 4);
6498 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6499 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6500 equivIntTy, descr->nElems );
6501 /* Do a shadow indexed get of the same size, giving t1. Take
6502 the bottom 32 bits of it, giving t2. Compute into t3 the
6503 origin for the index (almost certainly zero, but there's
6504 no harm in being completely general here, since iropt will
6505 remove any useless code), and fold it in, giving a final
6506 value t4. */
6507 t1 = assignNew( 'B', mce, equivIntTy,
6508 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
6509 e->Iex.GetI.bias ));
6510 t2 = narrowTo32( mce, t1 );
6511 t3 = schemeE( mce, e->Iex.GetI.ix );
6512 t4 = gen_maxU32( mce, t2, t3 );
6513 return t4;
6514 }
6515 case Iex_CCall: {
6516 Int i;
6517 IRAtom* here;
6518 IRExpr** args = e->Iex.CCall.args;
6519 IRAtom* curr = mkU32(0);
6520 for (i = 0; args[i]; i++) {
6521 tl_assert(i < 32);
6522 tl_assert(isOriginalAtom(mce, args[i]));
6523 /* Only take notice of this arg if the callee's
6524 mc-exclusion mask does not say it is to be excluded. */
6525 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
6526 /* the arg is to be excluded from definedness checking.
6527 Do nothing. */
6528 if (0) VG_(printf)("excluding %s(%d)\n",
6529 e->Iex.CCall.cee->name, i);
6530 } else {
6531 /* calculate the arg's definedness, and pessimistically
6532 merge it in. */
6533 here = schemeE( mce, args[i] );
6534 curr = gen_maxU32( mce, curr, here );
6535 }
6536 }
6537 return curr;
6538 }
6539 case Iex_Load: {
6540 Int dszB;
6541 dszB = sizeofIRType(e->Iex.Load.ty);
6542 /* assert that the B value for the address is already
6543 available (somewhere) */
6544 tl_assert(isIRAtom(e->Iex.Load.addr));
6545 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
6546 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
6547 }
florian5686b2d2013-01-29 03:57:40 +00006548 case Iex_ITE: {
6549 IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond );
florian5686b2d2013-01-29 03:57:40 +00006550 IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue );
sewardj07bfda22013-01-29 21:11:55 +00006551 IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse );
sewardj7cf4e6b2008-05-01 20:24:26 +00006552 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
6553 }
6554 case Iex_Qop: {
floriane2ab2972012-06-01 20:43:03 +00006555 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
6556 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
6557 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
6558 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006559 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
6560 gen_maxU32( mce, b3, b4 ) );
6561 }
6562 case Iex_Triop: {
florian26441742012-06-02 20:30:41 +00006563 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
6564 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
6565 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006566 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
6567 }
6568 case Iex_Binop: {
sewardjafed4c52009-07-12 13:00:17 +00006569 switch (e->Iex.Binop.op) {
6570 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
6571 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6572 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6573 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6574 /* Just say these all produce a defined result,
6575 regardless of their arguments. See
6576 COMMENT_ON_CasCmpEQ in this file. */
6577 return mkU32(0);
6578 default: {
6579 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6580 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6581 return gen_maxU32( mce, b1, b2 );
6582 }
6583 }
6584 tl_assert(0);
6585 /*NOTREACHED*/
sewardj7cf4e6b2008-05-01 20:24:26 +00006586 }
6587 case Iex_Unop: {
6588 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6589 return b1;
6590 }
6591 case Iex_Const:
6592 return mkU32(0);
6593 case Iex_RdTmp:
6594 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
6595 case Iex_Get: {
6596 Int b_offset = MC_(get_otrack_shadow_offset)(
6597 e->Iex.Get.offset,
6598 sizeofIRType(e->Iex.Get.ty)
6599 );
6600 tl_assert(b_offset >= -1
6601 && b_offset <= mce->layout->total_sizeB -4);
6602 if (b_offset >= 0) {
6603 /* FIXME: this isn't an atom! */
6604 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
6605 Ity_I32 );
6606 }
6607 return mkU32(0);
6608 }
6609 default:
6610 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
6611 ppIRExpr(e);
6612 VG_(tool_panic)("memcheck:schemeE");
6613 }
6614}
6615
sewardjdb5907d2009-11-26 17:20:21 +00006616
sewardj7cf4e6b2008-05-01 20:24:26 +00006617static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
6618{
6619 // This is a hacked version of do_shadow_Dirty
sewardj2eecb742012-06-01 16:11:41 +00006620 Int i, k, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00006621 IRAtom *here, *curr;
6622 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00006623
6624 /* First check the guard. */
6625 curr = schemeE( mce, d->guard );
6626
6627 /* Now round up all inputs and maxU32 over them. */
6628
florian434ffae2012-07-19 17:23:42 +00006629 /* Inputs: unmasked args
6630 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj7cf4e6b2008-05-01 20:24:26 +00006631 for (i = 0; d->args[i]; i++) {
6632 if (d->cee->mcx_mask & (1<<i)) {
6633 /* ignore this arg */
6634 } else {
6635 here = schemeE( mce, d->args[i] );
6636 curr = gen_maxU32( mce, curr, here );
6637 }
6638 }
6639
6640 /* Inputs: guest state that we read. */
6641 for (i = 0; i < d->nFxState; i++) {
6642 tl_assert(d->fxState[i].fx != Ifx_None);
6643 if (d->fxState[i].fx == Ifx_Write)
6644 continue;
6645
sewardj2eecb742012-06-01 16:11:41 +00006646 /* Enumerate the described state segments */
6647 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6648 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6649 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006650
sewardj2eecb742012-06-01 16:11:41 +00006651 /* Ignore any sections marked as 'always defined'. */
6652 if (isAlwaysDefd(mce, gOff, gSz)) {
6653 if (0)
6654 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6655 gOff, gSz);
6656 continue;
sewardj7cf4e6b2008-05-01 20:24:26 +00006657 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006658
sewardj2eecb742012-06-01 16:11:41 +00006659 /* This state element is read or modified. So we need to
6660 consider it. If larger than 4 bytes, deal with it in
6661 4-byte chunks. */
6662 while (True) {
6663 Int b_offset;
6664 tl_assert(gSz >= 0);
6665 if (gSz == 0) break;
6666 n = gSz <= 4 ? gSz : 4;
6667 /* update 'curr' with maxU32 of the state slice
6668 gOff .. gOff+n-1 */
6669 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6670 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006671 /* Observe the guard expression. If it is false use 0, i.e.
6672 nothing is known about the origin */
6673 IRAtom *cond, *iffalse, *iftrue;
6674
sewardjcc961652013-01-26 11:49:15 +00006675 cond = assignNew( 'B', mce, Ity_I1, d->guard);
florian434ffae2012-07-19 17:23:42 +00006676 iffalse = mkU32(0);
6677 iftrue = assignNew( 'B', mce, Ity_I32,
6678 IRExpr_Get(b_offset
6679 + 2*mce->layout->total_sizeB,
6680 Ity_I32));
6681 here = assignNew( 'B', mce, Ity_I32,
florian5686b2d2013-01-29 03:57:40 +00006682 IRExpr_ITE(cond, iftrue, iffalse));
sewardj2eecb742012-06-01 16:11:41 +00006683 curr = gen_maxU32( mce, curr, here );
6684 }
6685 gSz -= n;
6686 gOff += n;
6687 }
6688 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006689 }
6690
6691 /* Inputs: memory */
6692
6693 if (d->mFx != Ifx_None) {
6694 /* Because we may do multiple shadow loads/stores from the same
6695 base address, it's best to do a single test of its
6696 definedness right now. Post-instrumentation optimisation
6697 should remove all but this test. */
6698 tl_assert(d->mAddr);
6699 here = schemeE( mce, d->mAddr );
6700 curr = gen_maxU32( mce, curr, here );
6701 }
6702
6703 /* Deal with memory inputs (reads or modifies) */
6704 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006705 toDo = d->mSize;
6706 /* chew off 32-bit chunks. We don't care about the endianness
6707 since it's all going to be condensed down to a single bit,
6708 but nevertheless choose an endianness which is hopefully
6709 native to the platform. */
6710 while (toDo >= 4) {
florian434ffae2012-07-19 17:23:42 +00006711 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
6712 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006713 curr = gen_maxU32( mce, curr, here );
6714 toDo -= 4;
6715 }
sewardj8c93fcc2008-10-30 13:08:31 +00006716 /* handle possible 16-bit excess */
6717 while (toDo >= 2) {
florian434ffae2012-07-19 17:23:42 +00006718 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
6719 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006720 curr = gen_maxU32( mce, curr, here );
6721 toDo -= 2;
6722 }
floriancda994b2012-06-08 16:01:19 +00006723 /* chew off the remaining 8-bit chunk, if any */
6724 if (toDo == 1) {
florian434ffae2012-07-19 17:23:42 +00006725 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
6726 d->guard );
floriancda994b2012-06-08 16:01:19 +00006727 curr = gen_maxU32( mce, curr, here );
6728 toDo -= 1;
6729 }
6730 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006731 }
6732
6733 /* Whew! So curr is a 32-bit B-value which should give an origin
6734 of some use if any of the inputs to the helper are undefined.
6735 Now we need to re-distribute the results to all destinations. */
6736
6737 /* Outputs: the destination temporary, if there is one. */
6738 if (d->tmp != IRTemp_INVALID) {
6739 dst = findShadowTmpB(mce, d->tmp);
6740 assign( 'V', mce, dst, curr );
6741 }
6742
6743 /* Outputs: guest state that we write or modify. */
6744 for (i = 0; i < d->nFxState; i++) {
6745 tl_assert(d->fxState[i].fx != Ifx_None);
6746 if (d->fxState[i].fx == Ifx_Read)
6747 continue;
6748
sewardj2eecb742012-06-01 16:11:41 +00006749 /* Enumerate the described state segments */
6750 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6751 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6752 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006753
sewardj2eecb742012-06-01 16:11:41 +00006754 /* Ignore any sections marked as 'always defined'. */
6755 if (isAlwaysDefd(mce, gOff, gSz))
6756 continue;
6757
6758 /* This state element is written or modified. So we need to
6759 consider it. If larger than 4 bytes, deal with it in
6760 4-byte chunks. */
6761 while (True) {
6762 Int b_offset;
6763 tl_assert(gSz >= 0);
6764 if (gSz == 0) break;
6765 n = gSz <= 4 ? gSz : 4;
6766 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
6767 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6768 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006769
florian6c0aa2c2013-01-21 01:27:22 +00006770 /* If the guard expression evaluates to false we simply Put
6771 the value that is already stored in the guest state slot */
6772 IRAtom *cond, *iffalse;
6773
sewardjcc961652013-01-26 11:49:15 +00006774 cond = assignNew('B', mce, Ity_I1,
6775 d->guard);
florian6c0aa2c2013-01-21 01:27:22 +00006776 iffalse = assignNew('B', mce, Ity_I32,
6777 IRExpr_Get(b_offset +
6778 2*mce->layout->total_sizeB,
6779 Ity_I32));
6780 curr = assignNew('V', mce, Ity_I32,
florian5686b2d2013-01-29 03:57:40 +00006781 IRExpr_ITE(cond, curr, iffalse));
florian6c0aa2c2013-01-21 01:27:22 +00006782
sewardj2eecb742012-06-01 16:11:41 +00006783 stmt( 'B', mce, IRStmt_Put(b_offset
florian6c0aa2c2013-01-21 01:27:22 +00006784 + 2*mce->layout->total_sizeB,
sewardj2eecb742012-06-01 16:11:41 +00006785 curr ));
6786 }
6787 gSz -= n;
6788 gOff += n;
sewardj7cf4e6b2008-05-01 20:24:26 +00006789 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006790 }
6791 }
6792
6793 /* Outputs: memory that we write or modify. Same comments about
6794 endianness as above apply. */
6795 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006796 toDo = d->mSize;
6797 /* chew off 32-bit chunks */
6798 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006799 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006800 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006801 toDo -= 4;
6802 }
sewardj8c93fcc2008-10-30 13:08:31 +00006803 /* handle possible 16-bit excess */
6804 while (toDo >= 2) {
sewardjcafe5052013-01-17 14:24:35 +00006805 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
6806 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006807 toDo -= 2;
6808 }
floriancda994b2012-06-08 16:01:19 +00006809 /* chew off the remaining 8-bit chunk, if any */
6810 if (toDo == 1) {
6811 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006812 d->guard );
floriancda994b2012-06-08 16:01:19 +00006813 toDo -= 1;
6814 }
6815 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006816 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006817}
6818
sewardjdb5907d2009-11-26 17:20:21 +00006819
sewardjcafe5052013-01-17 14:24:35 +00006820/* Generate IR for origin shadowing for a general guarded store. */
6821static void do_origins_Store_guarded ( MCEnv* mce,
6822 IREndness stEnd,
6823 IRExpr* stAddr,
6824 IRExpr* stData,
6825 IRExpr* guard )
sewardjdb5907d2009-11-26 17:20:21 +00006826{
6827 Int dszB;
6828 IRAtom* dataB;
6829 /* assert that the B value for the address is already available
6830 (somewhere), since the call to schemeE will want to see it.
6831 XXXX how does this actually ensure that?? */
6832 tl_assert(isIRAtom(stAddr));
6833 tl_assert(isIRAtom(stData));
6834 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
6835 dataB = schemeE( mce, stData );
sewardjcafe5052013-01-17 14:24:35 +00006836 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard );
6837}
6838
6839
6840/* Generate IR for origin shadowing for a plain store. */
6841static void do_origins_Store_plain ( MCEnv* mce,
6842 IREndness stEnd,
6843 IRExpr* stAddr,
6844 IRExpr* stData )
6845{
6846 do_origins_Store_guarded ( mce, stEnd, stAddr, stData,
6847 NULL/*guard*/ );
6848}
6849
6850
6851/* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
6852
6853static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg )
6854{
6855 do_origins_Store_guarded( mce, sg->end, sg->addr,
6856 sg->data, sg->guard );
6857}
6858
6859static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg )
6860{
6861 IRType loadedTy = Ity_INVALID;
6862 switch (lg->cvt) {
6863 case ILGop_Ident32: loadedTy = Ity_I32; break;
6864 case ILGop_16Uto32: loadedTy = Ity_I16; break;
6865 case ILGop_16Sto32: loadedTy = Ity_I16; break;
6866 case ILGop_8Uto32: loadedTy = Ity_I8; break;
6867 case ILGop_8Sto32: loadedTy = Ity_I8; break;
6868 default: VG_(tool_panic)("schemeS.IRLoadG");
6869 }
6870 IRAtom* ori_alt
6871 = schemeE( mce,lg->alt );
6872 IRAtom* ori_final
6873 = expr2ori_Load_guarded_General(mce, loadedTy,
6874 lg->addr, 0/*addr bias*/,
6875 lg->guard, ori_alt );
6876 /* And finally, bind the origin to the destination temporary. */
6877 assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final );
sewardjdb5907d2009-11-26 17:20:21 +00006878}
6879
6880
sewardj7cf4e6b2008-05-01 20:24:26 +00006881static void schemeS ( MCEnv* mce, IRStmt* st )
6882{
6883 tl_assert(MC_(clo_mc_level) == 3);
6884
6885 switch (st->tag) {
6886
6887 case Ist_AbiHint:
6888 /* The value-check instrumenter handles this - by arranging
6889 to pass the address of the next instruction to
6890 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
6891 happen for origin tracking w.r.t. AbiHints. So there is
6892 nothing to do here. */
6893 break;
6894
6895 case Ist_PutI: {
floriand39b0222012-05-31 15:48:13 +00006896 IRPutI *puti = st->Ist.PutI.details;
sewardj7cf4e6b2008-05-01 20:24:26 +00006897 IRRegArray* descr_b;
6898 IRAtom *t1, *t2, *t3, *t4;
floriand39b0222012-05-31 15:48:13 +00006899 IRRegArray* descr = puti->descr;
sewardj7cf4e6b2008-05-01 20:24:26 +00006900 IRType equivIntTy
6901 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6902 /* If this array is unshadowable for whatever reason,
6903 generate no code. */
6904 if (equivIntTy == Ity_INVALID)
6905 break;
6906 tl_assert(sizeofIRType(equivIntTy) >= 4);
6907 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6908 descr_b
6909 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6910 equivIntTy, descr->nElems );
6911 /* Compute a value to Put - the conjoinment of the origin for
6912 the data to be Put-ted (obviously) and of the index value
6913 (not so obviously). */
floriand39b0222012-05-31 15:48:13 +00006914 t1 = schemeE( mce, puti->data );
6915 t2 = schemeE( mce, puti->ix );
sewardj7cf4e6b2008-05-01 20:24:26 +00006916 t3 = gen_maxU32( mce, t1, t2 );
6917 t4 = zWidenFrom32( mce, equivIntTy, t3 );
floriand39b0222012-05-31 15:48:13 +00006918 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
6919 puti->bias, t4) ));
sewardj7cf4e6b2008-05-01 20:24:26 +00006920 break;
6921 }
sewardjdb5907d2009-11-26 17:20:21 +00006922
sewardj7cf4e6b2008-05-01 20:24:26 +00006923 case Ist_Dirty:
6924 do_origins_Dirty( mce, st->Ist.Dirty.details );
6925 break;
sewardjdb5907d2009-11-26 17:20:21 +00006926
6927 case Ist_Store:
sewardjcafe5052013-01-17 14:24:35 +00006928 do_origins_Store_plain( mce, st->Ist.Store.end,
6929 st->Ist.Store.addr,
6930 st->Ist.Store.data );
6931 break;
6932
6933 case Ist_StoreG:
6934 do_origins_StoreG( mce, st->Ist.StoreG.details );
6935 break;
6936
6937 case Ist_LoadG:
6938 do_origins_LoadG( mce, st->Ist.LoadG.details );
sewardjdb5907d2009-11-26 17:20:21 +00006939 break;
6940
6941 case Ist_LLSC: {
6942 /* In short: treat a load-linked like a normal load followed
6943 by an assignment of the loaded (shadow) data the result
6944 temporary. Treat a store-conditional like a normal store,
6945 and mark the result temporary as defined. */
6946 if (st->Ist.LLSC.storedata == NULL) {
6947 /* Load Linked */
6948 IRType resTy
6949 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
6950 IRExpr* vanillaLoad
6951 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
6952 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6953 || resTy == Ity_I16 || resTy == Ity_I8);
6954 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6955 schemeE(mce, vanillaLoad));
6956 } else {
6957 /* Store conditional */
sewardjcafe5052013-01-17 14:24:35 +00006958 do_origins_Store_plain( mce, st->Ist.LLSC.end,
6959 st->Ist.LLSC.addr,
6960 st->Ist.LLSC.storedata );
sewardjdb5907d2009-11-26 17:20:21 +00006961 /* For the rationale behind this, see comments at the
6962 place where the V-shadow for .result is constructed, in
6963 do_shadow_LLSC. In short, we regard .result as
6964 always-defined. */
6965 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6966 mkU32(0) );
sewardj1c0ce7a2009-07-01 08:10:49 +00006967 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006968 break;
6969 }
sewardjdb5907d2009-11-26 17:20:21 +00006970
sewardj7cf4e6b2008-05-01 20:24:26 +00006971 case Ist_Put: {
6972 Int b_offset
6973 = MC_(get_otrack_shadow_offset)(
6974 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00006975 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00006976 );
6977 if (b_offset >= 0) {
6978 /* FIXME: this isn't an atom! */
6979 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
6980 schemeE( mce, st->Ist.Put.data )) );
6981 }
6982 break;
6983 }
sewardjdb5907d2009-11-26 17:20:21 +00006984
sewardj7cf4e6b2008-05-01 20:24:26 +00006985 case Ist_WrTmp:
6986 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
6987 schemeE(mce, st->Ist.WrTmp.data) );
6988 break;
sewardjdb5907d2009-11-26 17:20:21 +00006989
sewardj7cf4e6b2008-05-01 20:24:26 +00006990 case Ist_MBE:
6991 case Ist_NoOp:
6992 case Ist_Exit:
6993 case Ist_IMark:
6994 break;
sewardjdb5907d2009-11-26 17:20:21 +00006995
sewardj7cf4e6b2008-05-01 20:24:26 +00006996 default:
6997 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
6998 ppIRStmt(st);
6999 VG_(tool_panic)("memcheck:schemeS");
7000 }
7001}
7002
7003
njn25e49d8e72002-09-23 09:36:25 +00007004/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00007005/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00007006/*--------------------------------------------------------------------*/