blob: 74d4ca598f5b4cd4654618d6eb4d209c6c78f206 [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj03f8d3f2012-08-05 15:46:46 +000011 Copyright (C) 2000-2012 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000033#include "pub_tool_poolalloc.h" // For mc_include.h
njn1d0825f2006-03-27 11:37:07 +000034#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000035#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000036#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000037#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000038#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000039#include "pub_tool_xarray.h"
40#include "pub_tool_mallocfree.h"
41#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000042
sewardj7cf4e6b2008-05-01 20:24:26 +000043#include "mc_include.h"
44
45
sewardj7ee7d852011-06-16 11:37:21 +000046/* FIXMEs JRS 2011-June-16.
47
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
51*/
52
sewardj992dff92005-10-07 11:08:55 +000053/* This file implements the Memcheck instrumentation, and in
54 particular contains the core of its undefined value detection
55 machinery. For a comprehensive background of the terminology,
56 algorithms and rationale used herein, read:
57
58 Using Valgrind to detect undefined value errors with
59 bit-precision
60
61 Julian Seward and Nicholas Nethercote
62
63 2005 USENIX Annual Technical Conference (General Track),
64 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000065
66 ----
67
68 Here is as good a place as any to record exactly when V bits are and
69 should be checked, why, and what function is responsible.
70
71
72 Memcheck complains when an undefined value is used:
73
74 1. In the condition of a conditional branch. Because it could cause
75 incorrect control flow, and thus cause incorrect externally-visible
76 behaviour. [mc_translate.c:complainIfUndefined]
77
78 2. As an argument to a system call, or as the value that specifies
79 the system call number. Because it could cause an incorrect
80 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
81
82 3. As the address in a load or store. Because it could cause an
83 incorrect value to be used later, which could cause externally-visible
84 behaviour (eg. via incorrect control flow or an incorrect system call
85 argument) [complainIfUndefined]
86
87 4. As the target address of a branch. Because it could cause incorrect
88 control flow. [complainIfUndefined]
89
90 5. As an argument to setenv, unsetenv, or putenv. Because it could put
91 an incorrect value into the external environment.
92 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
93
94 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
95 [complainIfUndefined]
96
97 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
98 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
99 requested it. [in memcheck.h]
100
101
102 Memcheck also complains, but should not, when an undefined value is used:
103
104 8. As the shift value in certain SIMD shift operations (but not in the
105 standard integer shift operations). This inconsistency is due to
106 historical reasons.) [complainIfUndefined]
107
108
109 Memcheck does not complain, but should, when an undefined value is used:
110
111 9. As an input to a client request. Because the client request may
112 affect the visible behaviour -- see bug #144362 for an example
113 involving the malloc replacements in vg_replace_malloc.c and
114 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
115 isn't identified. That bug report also has some info on how to solve
116 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
117
118
119 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000120*/
121
sewardjb9e6d242013-05-11 13:42:08 +0000122/* Generation of addr-definedness, addr-validity and
123 guard-definedness checks pertaining to loads and stores (Iex_Load,
124 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
125 loads/stores) was re-checked 11 May 2013. */
126
sewardj95448072004-11-22 20:19:51 +0000127/*------------------------------------------------------------*/
128/*--- Forward decls ---*/
129/*------------------------------------------------------------*/
130
131struct _MCEnv;
132
sewardj7cf4e6b2008-05-01 20:24:26 +0000133static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000134static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000135static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000136
sewardjb5b87402011-03-07 16:05:35 +0000137static IRExpr *i128_const_zero(void);
sewardj95448072004-11-22 20:19:51 +0000138
139/*------------------------------------------------------------*/
140/*--- Memcheck running state, and tmp management. ---*/
141/*------------------------------------------------------------*/
142
sewardj1c0ce7a2009-07-01 08:10:49 +0000143/* Carries info about a particular tmp. The tmp's number is not
144 recorded, as this is implied by (equal to) its index in the tmpMap
145 in MCEnv. The tmp's type is also not recorded, as this is present
146 in MCEnv.sb->tyenv.
147
148 When .kind is Orig, .shadowV and .shadowB may give the identities
149 of the temps currently holding the associated definedness (shadowV)
150 and origin (shadowB) values, or these may be IRTemp_INVALID if code
151 to compute such values has not yet been emitted.
152
153 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
154 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
155 illogical for a shadow tmp itself to be shadowed.
156*/
157typedef
158 enum { Orig=1, VSh=2, BSh=3 }
159 TempKind;
160
161typedef
162 struct {
163 TempKind kind;
164 IRTemp shadowV;
165 IRTemp shadowB;
166 }
167 TempMapEnt;
168
169
sewardj95448072004-11-22 20:19:51 +0000170/* Carries around state during memcheck instrumentation. */
171typedef
172 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000173 /* MODIFIED: the superblock being constructed. IRStmts are
174 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000175 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000176 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000177
sewardj1c0ce7a2009-07-01 08:10:49 +0000178 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
179 current kind and possibly shadow temps for each temp in the
180 IRSB being constructed. Note that it does not contain the
181 type of each tmp. If you want to know the type, look at the
182 relevant entry in sb->tyenv. It follows that at all times
183 during the instrumentation process, the valid indices for
184 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
185 total number of Orig, V- and B- temps allocated so far.
186
187 The reason for this strange split (types in one place, all
188 other info in another) is that we need the types to be
189 attached to sb so as to make it possible to do
190 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
191 instrumentation process. */
192 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000193
sewardjd5204dc2004-12-31 01:16:11 +0000194 /* MODIFIED: indicates whether "bogus" literals have so far been
195 found. Starts off False, and may change to True. */
sewardj54eac252012-03-27 10:19:39 +0000196 Bool bogusLiterals;
197
198 /* READONLY: indicates whether we should use expensive
199 interpretations of integer adds, since unfortunately LLVM
200 uses them to do ORs in some circumstances. Defaulted to True
201 on MacOS and False everywhere else. */
202 Bool useLLVMworkarounds;
sewardjd5204dc2004-12-31 01:16:11 +0000203
sewardj95448072004-11-22 20:19:51 +0000204 /* READONLY: the guest layout. This indicates which parts of
205 the guest state should be regarded as 'always defined'. */
206 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000207
sewardj95448072004-11-22 20:19:51 +0000208 /* READONLY: the host word type. Needed for constructing
209 arguments of type 'HWord' to be passed to helper functions.
210 Ity_I32 or Ity_I64 only. */
211 IRType hWordTy;
212 }
213 MCEnv;
214
215/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
216 demand), as they are encountered. This is for two reasons.
217
218 (1) (less important reason): Many original tmps are unused due to
219 initial IR optimisation, and we do not want to spaces in tables
220 tracking them.
221
222 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
223 table indexed [0 .. n_types-1], which gives the current shadow for
224 each original tmp, or INVALID_IRTEMP if none is so far assigned.
225 It is necessary to support making multiple assignments to a shadow
226 -- specifically, after testing a shadow for definedness, it needs
227 to be made defined. But IR's SSA property disallows this.
228
229 (2) (more important reason): Therefore, when a shadow needs to get
230 a new value, a new temporary is created, the value is assigned to
231 that, and the tmpMap is updated to reflect the new binding.
232
233 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000234 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000235 there's a read-before-write error in the original tmps. The IR
236 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000237*/
sewardj95448072004-11-22 20:19:51 +0000238
sewardj1c0ce7a2009-07-01 08:10:49 +0000239/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
240 both the table in mce->sb and to our auxiliary mapping. Note that
241 newTemp may cause mce->tmpMap to resize, hence previous results
242 from VG_(indexXA)(mce->tmpMap) are invalidated. */
243static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
244{
245 Word newIx;
246 TempMapEnt ent;
247 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
248 ent.kind = kind;
249 ent.shadowV = IRTemp_INVALID;
250 ent.shadowB = IRTemp_INVALID;
251 newIx = VG_(addToXA)( mce->tmpMap, &ent );
252 tl_assert(newIx == (Word)tmp);
253 return tmp;
254}
255
256
sewardj95448072004-11-22 20:19:51 +0000257/* Find the tmp currently shadowing the given original tmp. If none
258 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000259static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000260{
sewardj1c0ce7a2009-07-01 08:10:49 +0000261 TempMapEnt* ent;
262 /* VG_(indexXA) range-checks 'orig', hence no need to check
263 here. */
264 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
265 tl_assert(ent->kind == Orig);
266 if (ent->shadowV == IRTemp_INVALID) {
267 IRTemp tmpV
268 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
269 /* newTemp may cause mce->tmpMap to resize, hence previous results
270 from VG_(indexXA) are invalid. */
271 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
272 tl_assert(ent->kind == Orig);
273 tl_assert(ent->shadowV == IRTemp_INVALID);
274 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000275 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000276 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000277}
278
sewardj95448072004-11-22 20:19:51 +0000279/* Allocate a new shadow for the given original tmp. This means any
280 previous shadow is abandoned. This is needed because it is
281 necessary to give a new value to a shadow once it has been tested
282 for undefinedness, but unfortunately IR's SSA property disallows
283 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000284 and use that instead.
285
286 This is the same as findShadowTmpV, except we don't bother to see
287 if a shadow temp already existed -- we simply allocate a new one
288 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000289static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000290{
sewardj1c0ce7a2009-07-01 08:10:49 +0000291 TempMapEnt* ent;
292 /* VG_(indexXA) range-checks 'orig', hence no need to check
293 here. */
294 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
295 tl_assert(ent->kind == Orig);
296 if (1) {
297 IRTemp tmpV
298 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
299 /* newTemp may cause mce->tmpMap to resize, hence previous results
300 from VG_(indexXA) are invalid. */
301 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
302 tl_assert(ent->kind == Orig);
303 ent->shadowV = tmpV;
304 }
sewardj95448072004-11-22 20:19:51 +0000305}
306
307
308/*------------------------------------------------------------*/
309/*--- IRAtoms -- a subset of IRExprs ---*/
310/*------------------------------------------------------------*/
311
312/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000313 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000314 input, most of this code deals in atoms. Usefully, a value atom
315 always has a V-value which is also an atom: constants are shadowed
316 by constants, and temps are shadowed by the corresponding shadow
317 temporary. */
318
319typedef IRExpr IRAtom;
320
321/* (used for sanity checks only): is this an atom which looks
322 like it's from original code? */
323static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
324{
325 if (a1->tag == Iex_Const)
326 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000327 if (a1->tag == Iex_RdTmp) {
328 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
329 return ent->kind == Orig;
330 }
sewardj95448072004-11-22 20:19:51 +0000331 return False;
332}
333
334/* (used for sanity checks only): is this an atom which looks
335 like it's from shadow code? */
336static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
337{
338 if (a1->tag == Iex_Const)
339 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000340 if (a1->tag == Iex_RdTmp) {
341 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
342 return ent->kind == VSh || ent->kind == BSh;
343 }
sewardj95448072004-11-22 20:19:51 +0000344 return False;
345}
346
347/* (used for sanity checks only): check that both args are atoms and
348 are identically-kinded. */
349static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
350{
sewardj0b9d74a2006-12-24 02:24:11 +0000351 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000352 return True;
sewardjbef552a2005-08-30 12:54:36 +0000353 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000354 return True;
355 return False;
356}
357
358
359/*------------------------------------------------------------*/
360/*--- Type management ---*/
361/*------------------------------------------------------------*/
362
363/* Shadow state is always accessed using integer types. This returns
364 an integer type with the same size (as per sizeofIRType) as the
365 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj45fa9f42012-05-21 10:18:10 +0000366 I64, I128, V128, V256. */
sewardj95448072004-11-22 20:19:51 +0000367
sewardj7cf4e6b2008-05-01 20:24:26 +0000368static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000369{
370 switch (ty) {
371 case Ity_I1:
372 case Ity_I8:
373 case Ity_I16:
374 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000375 case Ity_I64:
376 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000377 case Ity_F32: return Ity_I32;
sewardjb0ccb4d2012-04-02 10:22:05 +0000378 case Ity_D32: return Ity_I32;
sewardj3245c912004-12-10 14:58:26 +0000379 case Ity_F64: return Ity_I64;
sewardjb0ccb4d2012-04-02 10:22:05 +0000380 case Ity_D64: return Ity_I64;
sewardjb5b87402011-03-07 16:05:35 +0000381 case Ity_F128: return Ity_I128;
sewardjb0ccb4d2012-04-02 10:22:05 +0000382 case Ity_D128: return Ity_I128;
sewardj3245c912004-12-10 14:58:26 +0000383 case Ity_V128: return Ity_V128;
sewardj45fa9f42012-05-21 10:18:10 +0000384 case Ity_V256: return Ity_V256;
sewardj95448072004-11-22 20:19:51 +0000385 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000386 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000387 }
388}
389
390/* Produce a 'defined' value of the given shadow type. Should only be
391 supplied shadow types (Bit/I8/I16/I32/UI64). */
392static IRExpr* definedOfType ( IRType ty ) {
393 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000394 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
395 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
396 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
397 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
398 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
sewardjb5b87402011-03-07 16:05:35 +0000399 case Ity_I128: return i128_const_zero();
sewardj170ee212004-12-10 18:57:51 +0000400 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardjf1962d32006-10-19 13:22:16 +0000401 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000402 }
403}
404
405
sewardj95448072004-11-22 20:19:51 +0000406/*------------------------------------------------------------*/
407/*--- Constructing IR fragments ---*/
408/*------------------------------------------------------------*/
409
sewardj95448072004-11-22 20:19:51 +0000410/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000411static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
412 if (mce->trace) {
413 VG_(printf)(" %c: ", cat);
414 ppIRStmt(st);
415 VG_(printf)("\n");
416 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000417 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000418}
419
420/* assign value to tmp */
421static inline
422void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000423 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000424}
sewardj95448072004-11-22 20:19:51 +0000425
426/* build various kinds of expressions */
sewardj57f92b02010-08-22 11:54:14 +0000427#define triop(_op, _arg1, _arg2, _arg3) \
428 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
sewardj95448072004-11-22 20:19:51 +0000429#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
430#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
sewardjcc961652013-01-26 11:49:15 +0000431#define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
sewardj95448072004-11-22 20:19:51 +0000432#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
433#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
434#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
435#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000436#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000437#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000438
sewardj7cf4e6b2008-05-01 20:24:26 +0000439/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000440 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000441 an atom.
442
443 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000444 needs to be. But passing it in is redundant, since we can deduce
445 the type merely by inspecting 'e'. So at least use that fact to
446 assert that the two types agree. */
447static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
448{
449 TempKind k;
450 IRTemp t;
451 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardjb0ccb4d2012-04-02 10:22:05 +0000452
sewardj7cf4e6b2008-05-01 20:24:26 +0000453 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000454 switch (cat) {
455 case 'V': k = VSh; break;
456 case 'B': k = BSh; break;
457 case 'C': k = Orig; break;
458 /* happens when we are making up new "orig"
459 expressions, for IRCAS handling */
460 default: tl_assert(0);
461 }
462 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000463 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000464 return mkexpr(t);
465}
466
467
468/*------------------------------------------------------------*/
sewardjb5b87402011-03-07 16:05:35 +0000469/*--- Helper functions for 128-bit ops ---*/
470/*------------------------------------------------------------*/
sewardj45fa9f42012-05-21 10:18:10 +0000471
sewardjb5b87402011-03-07 16:05:35 +0000472static IRExpr *i128_const_zero(void)
473{
sewardj45fa9f42012-05-21 10:18:10 +0000474 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
475 return binop(Iop_64HLto128, z64, z64);
sewardjb5b87402011-03-07 16:05:35 +0000476}
477
sewardj45fa9f42012-05-21 10:18:10 +0000478/* There are no I128-bit loads and/or stores [as generated by any
479 current front ends]. So we do not need to worry about that in
480 expr2vbits_Load */
481
sewardjb5b87402011-03-07 16:05:35 +0000482
483/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +0000484/*--- Constructing definedness primitive ops ---*/
485/*------------------------------------------------------------*/
486
487/* --------- Defined-if-either-defined --------- */
488
489static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
490 tl_assert(isShadowAtom(mce,a1));
491 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000492 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000493}
494
495static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
496 tl_assert(isShadowAtom(mce,a1));
497 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000498 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000499}
500
501static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
502 tl_assert(isShadowAtom(mce,a1));
503 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000504 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000505}
506
sewardj7010f6e2004-12-10 13:35:22 +0000507static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
508 tl_assert(isShadowAtom(mce,a1));
509 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000510 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000511}
512
sewardj20d38f22005-02-07 23:50:18 +0000513static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000514 tl_assert(isShadowAtom(mce,a1));
515 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000516 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000517}
518
sewardj350e8f72012-06-25 07:52:15 +0000519static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
520 tl_assert(isShadowAtom(mce,a1));
521 tl_assert(isShadowAtom(mce,a2));
522 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
523}
524
sewardj95448072004-11-22 20:19:51 +0000525/* --------- Undefined-if-either-undefined --------- */
526
527static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
528 tl_assert(isShadowAtom(mce,a1));
529 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000530 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000531}
532
533static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
534 tl_assert(isShadowAtom(mce,a1));
535 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000536 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000537}
538
539static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
540 tl_assert(isShadowAtom(mce,a1));
541 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000542 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000543}
544
545static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
546 tl_assert(isShadowAtom(mce,a1));
547 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000548 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000549}
550
sewardjb5b87402011-03-07 16:05:35 +0000551static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
552 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
553 tl_assert(isShadowAtom(mce,a1));
554 tl_assert(isShadowAtom(mce,a2));
555 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
556 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
557 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
558 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
559 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
560 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
561
562 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
563}
564
sewardj20d38f22005-02-07 23:50:18 +0000565static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000566 tl_assert(isShadowAtom(mce,a1));
567 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000568 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000569}
570
sewardj350e8f72012-06-25 07:52:15 +0000571static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
572 tl_assert(isShadowAtom(mce,a1));
573 tl_assert(isShadowAtom(mce,a2));
574 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
575}
576
sewardje50a1b12004-12-17 01:24:54 +0000577static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000578 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000579 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000580 case Ity_I16: return mkUifU16(mce, a1, a2);
581 case Ity_I32: return mkUifU32(mce, a1, a2);
582 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardjb5b87402011-03-07 16:05:35 +0000583 case Ity_I128: return mkUifU128(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000584 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardja2f30952013-03-27 11:40:02 +0000585 case Ity_V256: return mkUifUV256(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000586 default:
587 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
588 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000589 }
590}
591
sewardj95448072004-11-22 20:19:51 +0000592/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000593
sewardj95448072004-11-22 20:19:51 +0000594static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
595 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000596 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000597}
598
599static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
600 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000601 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000602}
603
604static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
605 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000606 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000607}
608
sewardj681be302005-01-15 20:43:58 +0000609static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
610 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000611 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000612}
613
sewardj95448072004-11-22 20:19:51 +0000614/* --------- 'Improvement' functions for AND/OR. --------- */
615
616/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
617 defined (0); all other -> undefined (1).
618*/
619static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000620{
sewardj95448072004-11-22 20:19:51 +0000621 tl_assert(isOriginalAtom(mce, data));
622 tl_assert(isShadowAtom(mce, vbits));
623 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000624 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000625}
njn25e49d8e72002-09-23 09:36:25 +0000626
sewardj95448072004-11-22 20:19:51 +0000627static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
628{
629 tl_assert(isOriginalAtom(mce, data));
630 tl_assert(isShadowAtom(mce, vbits));
631 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000632 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000633}
njn25e49d8e72002-09-23 09:36:25 +0000634
sewardj95448072004-11-22 20:19:51 +0000635static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
636{
637 tl_assert(isOriginalAtom(mce, data));
638 tl_assert(isShadowAtom(mce, vbits));
639 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000640 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000641}
njn25e49d8e72002-09-23 09:36:25 +0000642
sewardj7010f6e2004-12-10 13:35:22 +0000643static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
644{
645 tl_assert(isOriginalAtom(mce, data));
646 tl_assert(isShadowAtom(mce, vbits));
647 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000648 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000649}
650
sewardj20d38f22005-02-07 23:50:18 +0000651static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000652{
653 tl_assert(isOriginalAtom(mce, data));
654 tl_assert(isShadowAtom(mce, vbits));
655 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000656 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000657}
658
sewardj350e8f72012-06-25 07:52:15 +0000659static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
660{
661 tl_assert(isOriginalAtom(mce, data));
662 tl_assert(isShadowAtom(mce, vbits));
663 tl_assert(sameKindedAtoms(data, vbits));
664 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
665}
666
sewardj95448072004-11-22 20:19:51 +0000667/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
668 defined (0); all other -> undefined (1).
669*/
670static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
671{
672 tl_assert(isOriginalAtom(mce, data));
673 tl_assert(isShadowAtom(mce, vbits));
674 tl_assert(sameKindedAtoms(data, vbits));
675 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000676 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000677 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000678 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000679 vbits) );
680}
njn25e49d8e72002-09-23 09:36:25 +0000681
sewardj95448072004-11-22 20:19:51 +0000682static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
683{
684 tl_assert(isOriginalAtom(mce, data));
685 tl_assert(isShadowAtom(mce, vbits));
686 tl_assert(sameKindedAtoms(data, vbits));
687 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000688 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000689 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000690 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000691 vbits) );
692}
njn25e49d8e72002-09-23 09:36:25 +0000693
sewardj95448072004-11-22 20:19:51 +0000694static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
695{
696 tl_assert(isOriginalAtom(mce, data));
697 tl_assert(isShadowAtom(mce, vbits));
698 tl_assert(sameKindedAtoms(data, vbits));
699 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000700 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000701 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000702 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000703 vbits) );
704}
705
sewardj7010f6e2004-12-10 13:35:22 +0000706static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
707{
708 tl_assert(isOriginalAtom(mce, data));
709 tl_assert(isShadowAtom(mce, vbits));
710 tl_assert(sameKindedAtoms(data, vbits));
711 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000712 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000713 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000714 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000715 vbits) );
716}
717
sewardj20d38f22005-02-07 23:50:18 +0000718static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000719{
720 tl_assert(isOriginalAtom(mce, data));
721 tl_assert(isShadowAtom(mce, vbits));
722 tl_assert(sameKindedAtoms(data, vbits));
723 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000724 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000725 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000726 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000727 vbits) );
728}
729
sewardj350e8f72012-06-25 07:52:15 +0000730static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
731{
732 tl_assert(isOriginalAtom(mce, data));
733 tl_assert(isShadowAtom(mce, vbits));
734 tl_assert(sameKindedAtoms(data, vbits));
735 return assignNew(
736 'V', mce, Ity_V256,
737 binop(Iop_OrV256,
738 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
739 vbits) );
740}
741
sewardj95448072004-11-22 20:19:51 +0000742/* --------- Pessimising casts. --------- */
743
sewardjb5b87402011-03-07 16:05:35 +0000744/* The function returns an expression of type DST_TY. If any of the VBITS
745 is undefined (value == 1) the resulting expression has all bits set to
746 1. Otherwise, all bits are 0. */
747
sewardj95448072004-11-22 20:19:51 +0000748static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
749{
sewardj4cc684b2007-08-25 23:09:36 +0000750 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000751 IRAtom* tmp1;
sewardj2eecb742012-06-01 16:11:41 +0000752
sewardj95448072004-11-22 20:19:51 +0000753 /* Note, dst_ty is a shadow type, not an original type. */
sewardj95448072004-11-22 20:19:51 +0000754 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000755 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000756
757 /* Fast-track some common cases */
758 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000759 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000760
761 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000762 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000763
764 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj2eecb742012-06-01 16:11:41 +0000765 /* PCast the arg, then clone it. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000766 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
767 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000768 }
769
sewardj2eecb742012-06-01 16:11:41 +0000770 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
771 /* PCast the arg. This gives all 0s or all 1s. Then throw away
772 the top half. */
773 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
774 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
775 }
776
sewardj4cc684b2007-08-25 23:09:36 +0000777 /* Else do it the slow way .. */
sewardj2eecb742012-06-01 16:11:41 +0000778 /* First of all, collapse vbits down to a single bit. */
sewardj4cc684b2007-08-25 23:09:36 +0000779 tmp1 = NULL;
780 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000781 case Ity_I1:
782 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000783 break;
sewardj95448072004-11-22 20:19:51 +0000784 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000785 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000786 break;
787 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000788 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000789 break;
790 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000791 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000792 break;
793 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000794 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000795 break;
sewardj69a13322005-04-23 01:14:51 +0000796 case Ity_I128: {
797 /* Gah. Chop it in half, OR the halves together, and compare
798 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000799 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
800 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
801 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
802 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000803 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000804 break;
805 }
sewardj95448072004-11-22 20:19:51 +0000806 default:
sewardj4cc684b2007-08-25 23:09:36 +0000807 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000808 VG_(tool_panic)("mkPCastTo(1)");
809 }
810 tl_assert(tmp1);
811 /* Now widen up to the dst type. */
812 switch (dst_ty) {
813 case Ity_I1:
814 return tmp1;
815 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000816 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000817 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000818 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000819 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000820 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000821 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000822 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000823 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000824 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
825 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000826 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000827 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000828 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
829 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000830 return tmp1;
sewardja2f30952013-03-27 11:40:02 +0000831 case Ity_V256:
832 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
833 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
834 tmp1, tmp1));
835 tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
836 tmp1, tmp1));
837 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000838 default:
839 ppIRType(dst_ty);
840 VG_(tool_panic)("mkPCastTo(2)");
841 }
842}
843
sewardjd5204dc2004-12-31 01:16:11 +0000844/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
845/*
846 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
847 PCasting to Ity_U1. However, sometimes it is necessary to be more
848 accurate. The insight is that the result is defined if two
849 corresponding bits can be found, one from each argument, so that
850 both bits are defined but are different -- that makes EQ say "No"
851 and NE say "Yes". Hence, we compute an improvement term and DifD
852 it onto the "normal" (UifU) result.
853
854 The result is:
855
856 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000857 -- naive version
858 PCastTo<sz>( UifU<sz>(vxx, vyy) )
859
sewardjd5204dc2004-12-31 01:16:11 +0000860 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000861
862 -- improvement term
863 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000864 )
sewardje6f8af42005-07-06 18:48:59 +0000865
sewardjd5204dc2004-12-31 01:16:11 +0000866 where
867 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000868 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000869
sewardje6f8af42005-07-06 18:48:59 +0000870 vec = Or<sz>( vxx, // 0 iff bit defined
871 vyy, // 0 iff bit defined
872 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
873 )
874
875 If any bit of vec is 0, the result is defined and so the
876 improvement term should produce 0...0, else it should produce
877 1...1.
878
879 Hence require for the improvement term:
880
881 if vec == 1...1 then 1...1 else 0...0
882 ->
883 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
884
885 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000886*/
887static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
888 IRType ty,
889 IRAtom* vxx, IRAtom* vyy,
890 IRAtom* xx, IRAtom* yy )
891{
sewardje6f8af42005-07-06 18:48:59 +0000892 IRAtom *naive, *vec, *improvement_term;
893 IRAtom *improved, *final_cast, *top;
894 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000895
896 tl_assert(isShadowAtom(mce,vxx));
897 tl_assert(isShadowAtom(mce,vyy));
898 tl_assert(isOriginalAtom(mce,xx));
899 tl_assert(isOriginalAtom(mce,yy));
900 tl_assert(sameKindedAtoms(vxx,xx));
901 tl_assert(sameKindedAtoms(vyy,yy));
902
903 switch (ty) {
sewardj4cfa81b2012-11-08 10:58:16 +0000904 case Ity_I16:
905 opOR = Iop_Or16;
906 opDIFD = Iop_And16;
907 opUIFU = Iop_Or16;
908 opNOT = Iop_Not16;
909 opXOR = Iop_Xor16;
910 opCMP = Iop_CmpEQ16;
911 top = mkU16(0xFFFF);
912 break;
sewardjd5204dc2004-12-31 01:16:11 +0000913 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000914 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000915 opDIFD = Iop_And32;
916 opUIFU = Iop_Or32;
917 opNOT = Iop_Not32;
918 opXOR = Iop_Xor32;
919 opCMP = Iop_CmpEQ32;
920 top = mkU32(0xFFFFFFFF);
921 break;
tomcd986332005-04-26 07:44:48 +0000922 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000923 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000924 opDIFD = Iop_And64;
925 opUIFU = Iop_Or64;
926 opNOT = Iop_Not64;
927 opXOR = Iop_Xor64;
928 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000929 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000930 break;
sewardjd5204dc2004-12-31 01:16:11 +0000931 default:
932 VG_(tool_panic)("expensiveCmpEQorNE");
933 }
934
935 naive
sewardj7cf4e6b2008-05-01 20:24:26 +0000936 = mkPCastTo(mce,ty,
937 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +0000938
939 vec
940 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000941 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000942 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +0000943 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000944 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000945 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +0000946 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +0000947 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +0000948
sewardje6f8af42005-07-06 18:48:59 +0000949 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +0000950 = mkPCastTo( mce,ty,
951 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +0000952
953 improved
sewardj7cf4e6b2008-05-01 20:24:26 +0000954 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000955
956 final_cast
957 = mkPCastTo( mce, Ity_I1, improved );
958
959 return final_cast;
960}
961
sewardj95448072004-11-22 20:19:51 +0000962
sewardj992dff92005-10-07 11:08:55 +0000963/* --------- Semi-accurate interpretation of CmpORD. --------- */
964
965/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
966
967 CmpORD32S(x,y) = 1<<3 if x <s y
968 = 1<<2 if x >s y
969 = 1<<1 if x == y
970
971 and similarly the unsigned variant. The default interpretation is:
972
973 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000974 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000975
976 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
977 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000978
979 Also deal with a special case better:
980
981 CmpORD32S(x,0)
982
983 Here, bit 3 (LT) of the result is a copy of the top bit of x and
984 will be defined even if the rest of x isn't. In which case we do:
985
986 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +0000987 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
988 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +0000989
sewardj1bc82102005-12-23 00:16:24 +0000990 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +0000991*/
sewardja9e62a92005-10-07 12:13:21 +0000992static Bool isZeroU32 ( IRAtom* e )
993{
994 return
995 toBool( e->tag == Iex_Const
996 && e->Iex.Const.con->tag == Ico_U32
997 && e->Iex.Const.con->Ico.U32 == 0 );
998}
999
sewardj1bc82102005-12-23 00:16:24 +00001000static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +00001001{
sewardj1bc82102005-12-23 00:16:24 +00001002 return
1003 toBool( e->tag == Iex_Const
1004 && e->Iex.Const.con->tag == Ico_U64
1005 && e->Iex.Const.con->Ico.U64 == 0 );
1006}
1007
1008static IRAtom* doCmpORD ( MCEnv* mce,
1009 IROp cmp_op,
1010 IRAtom* xxhash, IRAtom* yyhash,
1011 IRAtom* xx, IRAtom* yy )
1012{
1013 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
1014 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
1015 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
1016 IROp opAND = m64 ? Iop_And64 : Iop_And32;
1017 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
1018 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
1019 IRType ty = m64 ? Ity_I64 : Ity_I32;
1020 Int width = m64 ? 64 : 32;
1021
1022 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1023
1024 IRAtom* threeLeft1 = NULL;
1025 IRAtom* sevenLeft1 = NULL;
1026
sewardj992dff92005-10-07 11:08:55 +00001027 tl_assert(isShadowAtom(mce,xxhash));
1028 tl_assert(isShadowAtom(mce,yyhash));
1029 tl_assert(isOriginalAtom(mce,xx));
1030 tl_assert(isOriginalAtom(mce,yy));
1031 tl_assert(sameKindedAtoms(xxhash,xx));
1032 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +00001033 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1034 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +00001035
sewardja9e62a92005-10-07 12:13:21 +00001036 if (0) {
1037 ppIROp(cmp_op); VG_(printf)(" ");
1038 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1039 }
1040
sewardj1bc82102005-12-23 00:16:24 +00001041 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +00001042 /* fancy interpretation */
1043 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +00001044 tl_assert(isZero(yyhash));
1045 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +00001046 return
1047 binop(
sewardj1bc82102005-12-23 00:16:24 +00001048 opOR,
sewardja9e62a92005-10-07 12:13:21 +00001049 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001050 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001051 binop(
sewardj1bc82102005-12-23 00:16:24 +00001052 opAND,
1053 mkPCastTo(mce,ty, xxhash),
1054 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +00001055 )),
1056 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001057 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001058 binop(
sewardj1bc82102005-12-23 00:16:24 +00001059 opSHL,
sewardja9e62a92005-10-07 12:13:21 +00001060 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001061 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +00001062 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +00001063 mkU8(3)
1064 ))
1065 );
1066 } else {
1067 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +00001068 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +00001069 return
1070 binop(
sewardj1bc82102005-12-23 00:16:24 +00001071 opAND,
1072 mkPCastTo( mce,ty,
1073 mkUifU(mce,ty, xxhash,yyhash)),
1074 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +00001075 );
1076 }
sewardj992dff92005-10-07 11:08:55 +00001077}
1078
1079
sewardj95448072004-11-22 20:19:51 +00001080/*------------------------------------------------------------*/
1081/*--- Emit a test and complaint if something is undefined. ---*/
1082/*------------------------------------------------------------*/
1083
sewardj7cf4e6b2008-05-01 20:24:26 +00001084static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1085
1086
sewardj95448072004-11-22 20:19:51 +00001087/* Set the annotations on a dirty helper to indicate that the stack
1088 pointer and instruction pointers might be read. This is the
1089 behaviour of all 'emit-a-complaint' style functions we might
1090 call. */
1091
1092static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1093 di->nFxState = 2;
sewardj2eecb742012-06-01 16:11:41 +00001094 di->fxState[0].fx = Ifx_Read;
1095 di->fxState[0].offset = mce->layout->offset_SP;
1096 di->fxState[0].size = mce->layout->sizeof_SP;
1097 di->fxState[0].nRepeats = 0;
1098 di->fxState[0].repeatLen = 0;
1099 di->fxState[1].fx = Ifx_Read;
1100 di->fxState[1].offset = mce->layout->offset_IP;
1101 di->fxState[1].size = mce->layout->sizeof_IP;
1102 di->fxState[1].nRepeats = 0;
1103 di->fxState[1].repeatLen = 0;
sewardj95448072004-11-22 20:19:51 +00001104}
1105
1106
sewardjcafe5052013-01-17 14:24:35 +00001107/* Check the supplied *original* |atom| for undefinedness, and emit a
sewardj95448072004-11-22 20:19:51 +00001108 complaint if so. Once that happens, mark it as defined. This is
1109 possible because the atom is either a tmp or literal. If it's a
1110 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1111 be defined. In fact as mentioned above, we will have to allocate a
1112 new tmp to carry the new 'defined' shadow value, and update the
1113 original->tmp mapping accordingly; we cannot simply assign a new
sewardjcafe5052013-01-17 14:24:35 +00001114 value to an existing shadow tmp as this breaks SSAness.
1115
sewardjb9e6d242013-05-11 13:42:08 +00001116 The checks are performed, any resulting complaint emitted, and
1117 |atom|'s shadow temp set to 'defined', ONLY in the case that
1118 |guard| evaluates to True at run-time. If it evaluates to False
1119 then no action is performed. If |guard| is NULL (the usual case)
1120 then it is assumed to be always-true, and hence these actions are
1121 performed unconditionally.
1122
1123 This routine does not generate code to check the definedness of
1124 |guard|. The caller is assumed to have taken care of that already.
sewardj95448072004-11-22 20:19:51 +00001125*/
sewardjb9e6d242013-05-11 13:42:08 +00001126static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001127{
sewardj7cf97ee2004-11-28 14:25:01 +00001128 IRAtom* vatom;
1129 IRType ty;
1130 Int sz;
1131 IRDirty* di;
1132 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +00001133 IRAtom* origin;
1134 void* fn;
florian6bd9dc12012-11-23 16:17:43 +00001135 const HChar* nm;
sewardj7cf4e6b2008-05-01 20:24:26 +00001136 IRExpr** args;
1137 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001138
njn1d0825f2006-03-27 11:37:07 +00001139 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001140 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001141 return;
1142
sewardjb9e6d242013-05-11 13:42:08 +00001143 if (guard)
1144 tl_assert(isOriginalAtom(mce, guard));
1145
sewardj95448072004-11-22 20:19:51 +00001146 /* Since the original expression is atomic, there's no duplicated
1147 work generated by making multiple V-expressions for it. So we
1148 don't really care about the possibility that someone else may
1149 also create a V-interpretion for it. */
1150 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001151 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001152 tl_assert(isShadowAtom(mce, vatom));
1153 tl_assert(sameKindedAtoms(atom, vatom));
1154
sewardj1c0ce7a2009-07-01 08:10:49 +00001155 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001156
1157 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001158 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001159
sewardj7cf97ee2004-11-28 14:25:01 +00001160 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001161 /* cond will be 0 if all defined, and 1 if any not defined. */
1162
sewardj7cf4e6b2008-05-01 20:24:26 +00001163 /* Get the origin info for the value we are about to check. At
1164 least, if we are doing origin tracking. If not, use a dummy
1165 zero origin. */
1166 if (MC_(clo_mc_level) == 3) {
1167 origin = schemeE( mce, atom );
1168 if (mce->hWordTy == Ity_I64) {
1169 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1170 }
1171 } else {
1172 origin = NULL;
1173 }
1174
1175 fn = NULL;
1176 nm = NULL;
1177 args = NULL;
1178 nargs = -1;
1179
sewardj95448072004-11-22 20:19:51 +00001180 switch (sz) {
1181 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001182 if (origin) {
1183 fn = &MC_(helperc_value_check0_fail_w_o);
1184 nm = "MC_(helperc_value_check0_fail_w_o)";
1185 args = mkIRExprVec_1(origin);
1186 nargs = 1;
1187 } else {
1188 fn = &MC_(helperc_value_check0_fail_no_o);
1189 nm = "MC_(helperc_value_check0_fail_no_o)";
1190 args = mkIRExprVec_0();
1191 nargs = 0;
1192 }
sewardj95448072004-11-22 20:19:51 +00001193 break;
1194 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001195 if (origin) {
1196 fn = &MC_(helperc_value_check1_fail_w_o);
1197 nm = "MC_(helperc_value_check1_fail_w_o)";
1198 args = mkIRExprVec_1(origin);
1199 nargs = 1;
1200 } else {
1201 fn = &MC_(helperc_value_check1_fail_no_o);
1202 nm = "MC_(helperc_value_check1_fail_no_o)";
1203 args = mkIRExprVec_0();
1204 nargs = 0;
1205 }
sewardj95448072004-11-22 20:19:51 +00001206 break;
1207 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001208 if (origin) {
1209 fn = &MC_(helperc_value_check4_fail_w_o);
1210 nm = "MC_(helperc_value_check4_fail_w_o)";
1211 args = mkIRExprVec_1(origin);
1212 nargs = 1;
1213 } else {
1214 fn = &MC_(helperc_value_check4_fail_no_o);
1215 nm = "MC_(helperc_value_check4_fail_no_o)";
1216 args = mkIRExprVec_0();
1217 nargs = 0;
1218 }
sewardj95448072004-11-22 20:19:51 +00001219 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001220 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001221 if (origin) {
1222 fn = &MC_(helperc_value_check8_fail_w_o);
1223 nm = "MC_(helperc_value_check8_fail_w_o)";
1224 args = mkIRExprVec_1(origin);
1225 nargs = 1;
1226 } else {
1227 fn = &MC_(helperc_value_check8_fail_no_o);
1228 nm = "MC_(helperc_value_check8_fail_no_o)";
1229 args = mkIRExprVec_0();
1230 nargs = 0;
1231 }
sewardj11bcc4e2005-04-23 22:38:38 +00001232 break;
njn4c245e52009-03-15 23:25:38 +00001233 case 2:
1234 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001235 if (origin) {
1236 fn = &MC_(helperc_value_checkN_fail_w_o);
1237 nm = "MC_(helperc_value_checkN_fail_w_o)";
1238 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1239 nargs = 2;
1240 } else {
1241 fn = &MC_(helperc_value_checkN_fail_no_o);
1242 nm = "MC_(helperc_value_checkN_fail_no_o)";
1243 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1244 nargs = 1;
1245 }
sewardj95448072004-11-22 20:19:51 +00001246 break;
njn4c245e52009-03-15 23:25:38 +00001247 default:
1248 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001249 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001250
1251 tl_assert(fn);
1252 tl_assert(nm);
1253 tl_assert(args);
1254 tl_assert(nargs >= 0 && nargs <= 2);
1255 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1256 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1257
1258 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1259 VG_(fnptr_to_fnentry)( fn ), args );
sewardjb9e6d242013-05-11 13:42:08 +00001260 di->guard = cond; // and cond is PCast-to-1(atom#)
1261
1262 /* If the complaint is to be issued under a guard condition, AND
1263 that into the guard condition for the helper call. */
1264 if (guard) {
1265 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1266 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1267 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1268 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1269 }
florian434ffae2012-07-19 17:23:42 +00001270
sewardj95448072004-11-22 20:19:51 +00001271 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001272 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001273
sewardjb9e6d242013-05-11 13:42:08 +00001274 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1275 defined -- but only in the case where the guard evaluates to
1276 True at run-time. Do the update by setting the orig->shadow
1277 mapping for tmp to reflect the fact that this shadow is getting
1278 a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001279 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001280 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001281 if (vatom->tag == Iex_RdTmp) {
1282 tl_assert(atom->tag == Iex_RdTmp);
sewardjb9e6d242013-05-11 13:42:08 +00001283 if (guard == NULL) {
1284 // guard is 'always True', hence update unconditionally
1285 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1286 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1287 definedOfType(ty));
1288 } else {
1289 // update the temp only conditionally. Do this by copying
1290 // its old value when the guard is False.
1291 // The old value ..
1292 IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1293 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1294 IRAtom* new_tmpV
1295 = assignNew('V', mce, shadowTypeV(ty),
1296 IRExpr_ITE(guard, definedOfType(ty),
1297 mkexpr(old_tmpV)));
1298 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV);
1299 }
sewardj95448072004-11-22 20:19:51 +00001300 }
1301}
1302
1303
1304/*------------------------------------------------------------*/
1305/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1306/*------------------------------------------------------------*/
1307
1308/* Examine the always-defined sections declared in layout to see if
1309 the (offset,size) section is within one. Note, is is an error to
1310 partially fall into such a region: (offset,size) should either be
1311 completely in such a region or completely not-in such a region.
1312*/
1313static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1314{
1315 Int minoffD, maxoffD, i;
1316 Int minoff = offset;
1317 Int maxoff = minoff + size - 1;
1318 tl_assert((minoff & ~0xFFFF) == 0);
1319 tl_assert((maxoff & ~0xFFFF) == 0);
1320
1321 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1322 minoffD = mce->layout->alwaysDefd[i].offset;
1323 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1324 tl_assert((minoffD & ~0xFFFF) == 0);
1325 tl_assert((maxoffD & ~0xFFFF) == 0);
1326
1327 if (maxoff < minoffD || maxoffD < minoff)
1328 continue; /* no overlap */
1329 if (minoff >= minoffD && maxoff <= maxoffD)
1330 return True; /* completely contained in an always-defd section */
1331
1332 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1333 }
1334 return False; /* could not find any containing section */
1335}
1336
1337
1338/* Generate into bb suitable actions to shadow this Put. If the state
1339 slice is marked 'always defined', do nothing. Otherwise, write the
1340 supplied V bits to the shadow state. We can pass in either an
1341 original atom or a V-atom, but not both. In the former case the
1342 relevant V-bits are then generated from the original.
florian434ffae2012-07-19 17:23:42 +00001343 We assume here, that the definedness of GUARD has already been checked.
sewardj95448072004-11-22 20:19:51 +00001344*/
1345static
1346void do_shadow_PUT ( MCEnv* mce, Int offset,
florian434ffae2012-07-19 17:23:42 +00001347 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001348{
sewardj7cf97ee2004-11-28 14:25:01 +00001349 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001350
1351 // Don't do shadow PUTs if we're not doing undefined value checking.
1352 // Their absence lets Vex's optimiser remove all the shadow computation
1353 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001354 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001355 return;
1356
sewardj95448072004-11-22 20:19:51 +00001357 if (atom) {
1358 tl_assert(!vatom);
1359 tl_assert(isOriginalAtom(mce, atom));
1360 vatom = expr2vbits( mce, atom );
1361 } else {
1362 tl_assert(vatom);
1363 tl_assert(isShadowAtom(mce, vatom));
1364 }
1365
sewardj1c0ce7a2009-07-01 08:10:49 +00001366 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001367 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001368 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001369 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1370 /* later: no ... */
1371 /* emit code to emit a complaint if any of the vbits are 1. */
1372 /* complainIfUndefined(mce, atom); */
1373 } else {
1374 /* Do a plain shadow Put. */
florian434ffae2012-07-19 17:23:42 +00001375 if (guard) {
1376 /* If the guard expression evaluates to false we simply Put the value
1377 that is already stored in the guest state slot */
1378 IRAtom *cond, *iffalse;
1379
sewardjcc961652013-01-26 11:49:15 +00001380 cond = assignNew('V', mce, Ity_I1, guard);
florian434ffae2012-07-19 17:23:42 +00001381 iffalse = assignNew('V', mce, ty,
1382 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
florian5686b2d2013-01-29 03:57:40 +00001383 vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse));
florian434ffae2012-07-19 17:23:42 +00001384 }
1385 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
sewardj95448072004-11-22 20:19:51 +00001386 }
1387}
1388
1389
1390/* Return an expression which contains the V bits corresponding to the
1391 given GETI (passed in in pieces).
1392*/
1393static
floriand39b0222012-05-31 15:48:13 +00001394void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
sewardj95448072004-11-22 20:19:51 +00001395{
sewardj7cf97ee2004-11-28 14:25:01 +00001396 IRAtom* vatom;
1397 IRType ty, tyS;
1398 Int arrSize;;
floriand39b0222012-05-31 15:48:13 +00001399 IRRegArray* descr = puti->descr;
1400 IRAtom* ix = puti->ix;
1401 Int bias = puti->bias;
1402 IRAtom* atom = puti->data;
sewardj7cf97ee2004-11-28 14:25:01 +00001403
njn1d0825f2006-03-27 11:37:07 +00001404 // Don't do shadow PUTIs if we're not doing undefined value checking.
1405 // Their absence lets Vex's optimiser remove all the shadow computation
1406 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001407 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001408 return;
1409
sewardj95448072004-11-22 20:19:51 +00001410 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001411 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001412 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001413 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001414 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001415 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001416 tl_assert(ty != Ity_I1);
1417 tl_assert(isOriginalAtom(mce,ix));
sewardjb9e6d242013-05-11 13:42:08 +00001418 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001419 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1420 /* later: no ... */
1421 /* emit code to emit a complaint if any of the vbits are 1. */
1422 /* complainIfUndefined(mce, atom); */
1423 } else {
1424 /* Do a cloned version of the Put that refers to the shadow
1425 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001426 IRRegArray* new_descr
1427 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1428 tyS, descr->nElems);
floriand39b0222012-05-31 15:48:13 +00001429 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
sewardj95448072004-11-22 20:19:51 +00001430 }
1431}
1432
1433
1434/* Return an expression which contains the V bits corresponding to the
1435 given GET (passed in in pieces).
1436*/
1437static
1438IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1439{
sewardj7cf4e6b2008-05-01 20:24:26 +00001440 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001441 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001442 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001443 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1444 /* Always defined, return all zeroes of the relevant type */
1445 return definedOfType(tyS);
1446 } else {
1447 /* return a cloned version of the Get that refers to the shadow
1448 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001449 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001450 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1451 }
1452}
1453
1454
1455/* Return an expression which contains the V bits corresponding to the
1456 given GETI (passed in in pieces).
1457*/
1458static
sewardj0b9d74a2006-12-24 02:24:11 +00001459IRExpr* shadow_GETI ( MCEnv* mce,
1460 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001461{
1462 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001463 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001464 Int arrSize = descr->nElems * sizeofIRType(ty);
1465 tl_assert(ty != Ity_I1);
1466 tl_assert(isOriginalAtom(mce,ix));
sewardjb9e6d242013-05-11 13:42:08 +00001467 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001468 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1469 /* Always defined, return all zeroes of the relevant type */
1470 return definedOfType(tyS);
1471 } else {
1472 /* return a cloned version of the Get that refers to the shadow
1473 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001474 IRRegArray* new_descr
1475 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1476 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001477 return IRExpr_GetI( new_descr, ix, bias );
1478 }
1479}
1480
1481
1482/*------------------------------------------------------------*/
1483/*--- Generating approximations for unknown operations, ---*/
1484/*--- using lazy-propagate semantics ---*/
1485/*------------------------------------------------------------*/
1486
1487/* Lazy propagation of undefinedness from two values, resulting in the
1488 specified shadow type.
1489*/
1490static
1491IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1492{
sewardj95448072004-11-22 20:19:51 +00001493 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001494 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1495 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001496 tl_assert(isShadowAtom(mce,va1));
1497 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001498
1499 /* The general case is inefficient because PCast is an expensive
1500 operation. Here are some special cases which use PCast only
1501 once rather than twice. */
1502
1503 /* I64 x I64 -> I64 */
1504 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1505 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1506 at = mkUifU(mce, Ity_I64, va1, va2);
1507 at = mkPCastTo(mce, Ity_I64, at);
1508 return at;
1509 }
1510
1511 /* I64 x I64 -> I32 */
1512 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1513 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1514 at = mkUifU(mce, Ity_I64, va1, va2);
1515 at = mkPCastTo(mce, Ity_I32, at);
1516 return at;
1517 }
1518
1519 if (0) {
1520 VG_(printf)("mkLazy2 ");
1521 ppIRType(t1);
1522 VG_(printf)("_");
1523 ppIRType(t2);
1524 VG_(printf)("_");
1525 ppIRType(finalVty);
1526 VG_(printf)("\n");
1527 }
1528
1529 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001530 at = mkPCastTo(mce, Ity_I32, va1);
1531 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1532 at = mkPCastTo(mce, finalVty, at);
1533 return at;
1534}
1535
1536
sewardjed69fdb2006-02-03 16:12:27 +00001537/* 3-arg version of the above. */
1538static
1539IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1540 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1541{
1542 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001543 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1544 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1545 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001546 tl_assert(isShadowAtom(mce,va1));
1547 tl_assert(isShadowAtom(mce,va2));
1548 tl_assert(isShadowAtom(mce,va3));
1549
1550 /* The general case is inefficient because PCast is an expensive
1551 operation. Here are some special cases which use PCast only
1552 twice rather than three times. */
1553
1554 /* I32 x I64 x I64 -> I64 */
1555 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1556 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1557 && finalVty == Ity_I64) {
1558 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1559 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1560 mode indication which is fully defined, this should get
1561 folded out later. */
1562 at = mkPCastTo(mce, Ity_I64, va1);
1563 /* Now fold in 2nd and 3rd args. */
1564 at = mkUifU(mce, Ity_I64, at, va2);
1565 at = mkUifU(mce, Ity_I64, at, va3);
1566 /* and PCast once again. */
1567 at = mkPCastTo(mce, Ity_I64, at);
1568 return at;
1569 }
1570
carllfb583cb2013-01-22 20:26:34 +00001571 /* I32 x I8 x I64 -> I64 */
1572 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64
1573 && finalVty == Ity_I64) {
1574 if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n");
1575 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a
1576 * rounding mode indication which is fully defined, this should
1577 * get folded out later.
1578 */
1579 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1580 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1581 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1582 at = mkUifU(mce, Ity_I64, at, va3);
1583 /* and PCast once again. */
1584 at = mkPCastTo(mce, Ity_I64, at);
1585 return at;
1586 }
1587
sewardj453e8f82006-02-09 03:25:06 +00001588 /* I32 x I64 x I64 -> I32 */
1589 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1590 && finalVty == Ity_I32) {
sewardj59570ff2010-01-01 11:59:33 +00001591 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
sewardj453e8f82006-02-09 03:25:06 +00001592 at = mkPCastTo(mce, Ity_I64, va1);
1593 at = mkUifU(mce, Ity_I64, at, va2);
1594 at = mkUifU(mce, Ity_I64, at, va3);
1595 at = mkPCastTo(mce, Ity_I32, at);
1596 return at;
1597 }
1598
sewardj59570ff2010-01-01 11:59:33 +00001599 /* I32 x I32 x I32 -> I32 */
1600 /* 32-bit FP idiom, as (eg) happens on ARM */
1601 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1602 && finalVty == Ity_I32) {
1603 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1604 at = va1;
1605 at = mkUifU(mce, Ity_I32, at, va2);
1606 at = mkUifU(mce, Ity_I32, at, va3);
1607 at = mkPCastTo(mce, Ity_I32, at);
1608 return at;
1609 }
1610
sewardjb5b87402011-03-07 16:05:35 +00001611 /* I32 x I128 x I128 -> I128 */
1612 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1613 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1614 && finalVty == Ity_I128) {
1615 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1616 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1617 mode indication which is fully defined, this should get
1618 folded out later. */
1619 at = mkPCastTo(mce, Ity_I128, va1);
1620 /* Now fold in 2nd and 3rd args. */
1621 at = mkUifU(mce, Ity_I128, at, va2);
1622 at = mkUifU(mce, Ity_I128, at, va3);
1623 /* and PCast once again. */
1624 at = mkPCastTo(mce, Ity_I128, at);
1625 return at;
1626 }
carllfb583cb2013-01-22 20:26:34 +00001627
1628 /* I32 x I8 x I128 -> I128 */
1629 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1630 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128
1631 && finalVty == Ity_I128) {
1632 if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n");
sewardja28c43c2013-01-29 17:18:56 +00001633 /* Use I64 as an intermediate type, which means PCasting all 3
1634 args to I64 to start with. 1st arg is typically a rounding
1635 mode indication which is fully defined, so we hope that it
1636 will get folded out later. */
carllfb583cb2013-01-22 20:26:34 +00001637 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1638 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
sewardja28c43c2013-01-29 17:18:56 +00001639 IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3);
1640 /* Now UifU all three together. */
carllfb583cb2013-01-22 20:26:34 +00001641 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
sewardja28c43c2013-01-29 17:18:56 +00001642 at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3)
carllfb583cb2013-01-22 20:26:34 +00001643 /* and PCast once again. */
1644 at = mkPCastTo(mce, Ity_I128, at);
1645 return at;
1646 }
sewardj453e8f82006-02-09 03:25:06 +00001647 if (1) {
1648 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001649 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001650 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001651 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001652 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001653 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001654 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001655 ppIRType(finalVty);
1656 VG_(printf)("\n");
1657 }
1658
sewardj453e8f82006-02-09 03:25:06 +00001659 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001660 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001661 /*
sewardjed69fdb2006-02-03 16:12:27 +00001662 at = mkPCastTo(mce, Ity_I32, va1);
1663 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1664 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1665 at = mkPCastTo(mce, finalVty, at);
1666 return at;
sewardj453e8f82006-02-09 03:25:06 +00001667 */
sewardjed69fdb2006-02-03 16:12:27 +00001668}
1669
1670
sewardje91cea72006-02-08 19:32:02 +00001671/* 4-arg version of the above. */
1672static
1673IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1674 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1675{
1676 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001677 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1678 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1679 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1680 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001681 tl_assert(isShadowAtom(mce,va1));
1682 tl_assert(isShadowAtom(mce,va2));
1683 tl_assert(isShadowAtom(mce,va3));
1684 tl_assert(isShadowAtom(mce,va4));
1685
1686 /* The general case is inefficient because PCast is an expensive
1687 operation. Here are some special cases which use PCast only
1688 twice rather than three times. */
1689
1690 /* I32 x I64 x I64 x I64 -> I64 */
1691 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1692 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1693 && finalVty == Ity_I64) {
1694 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1695 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1696 mode indication which is fully defined, this should get
1697 folded out later. */
1698 at = mkPCastTo(mce, Ity_I64, va1);
1699 /* Now fold in 2nd, 3rd, 4th args. */
1700 at = mkUifU(mce, Ity_I64, at, va2);
1701 at = mkUifU(mce, Ity_I64, at, va3);
1702 at = mkUifU(mce, Ity_I64, at, va4);
1703 /* and PCast once again. */
1704 at = mkPCastTo(mce, Ity_I64, at);
1705 return at;
1706 }
sewardjb5b87402011-03-07 16:05:35 +00001707 /* I32 x I32 x I32 x I32 -> I32 */
1708 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1709 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1710 && finalVty == Ity_I32) {
1711 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1712 at = va1;
1713 /* Now fold in 2nd, 3rd, 4th args. */
1714 at = mkUifU(mce, Ity_I32, at, va2);
1715 at = mkUifU(mce, Ity_I32, at, va3);
1716 at = mkUifU(mce, Ity_I32, at, va4);
1717 at = mkPCastTo(mce, Ity_I32, at);
1718 return at;
1719 }
sewardje91cea72006-02-08 19:32:02 +00001720
1721 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001722 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001723 ppIRType(t1);
1724 VG_(printf)(" x ");
1725 ppIRType(t2);
1726 VG_(printf)(" x ");
1727 ppIRType(t3);
1728 VG_(printf)(" x ");
1729 ppIRType(t4);
1730 VG_(printf)(" -> ");
1731 ppIRType(finalVty);
1732 VG_(printf)("\n");
1733 }
1734
1735 tl_assert(0);
1736}
1737
1738
sewardj95448072004-11-22 20:19:51 +00001739/* Do the lazy propagation game from a null-terminated vector of
1740 atoms. This is presumably the arguments to a helper call, so the
1741 IRCallee info is also supplied in order that we can know which
1742 arguments should be ignored (via the .mcx_mask field).
1743*/
1744static
1745IRAtom* mkLazyN ( MCEnv* mce,
1746 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1747{
sewardj4cc684b2007-08-25 23:09:36 +00001748 Int i;
sewardj95448072004-11-22 20:19:51 +00001749 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001750 IRAtom* curr;
1751 IRType mergeTy;
sewardj99430032011-05-04 09:09:31 +00001752 Bool mergeTy64 = True;
sewardj4cc684b2007-08-25 23:09:36 +00001753
1754 /* Decide on the type of the merge intermediary. If all relevant
1755 args are I64, then it's I64. In all other circumstances, use
1756 I32. */
1757 for (i = 0; exprvec[i]; i++) {
1758 tl_assert(i < 32);
1759 tl_assert(isOriginalAtom(mce, exprvec[i]));
1760 if (cee->mcx_mask & (1<<i))
1761 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001762 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001763 mergeTy64 = False;
1764 }
1765
1766 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1767 curr = definedOfType(mergeTy);
1768
sewardj95448072004-11-22 20:19:51 +00001769 for (i = 0; exprvec[i]; i++) {
1770 tl_assert(i < 32);
1771 tl_assert(isOriginalAtom(mce, exprvec[i]));
1772 /* Only take notice of this arg if the callee's mc-exclusion
1773 mask does not say it is to be excluded. */
1774 if (cee->mcx_mask & (1<<i)) {
1775 /* the arg is to be excluded from definedness checking. Do
1776 nothing. */
1777 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1778 } else {
1779 /* calculate the arg's definedness, and pessimistically merge
1780 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001781 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1782 curr = mergeTy64
1783 ? mkUifU64(mce, here, curr)
1784 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001785 }
1786 }
1787 return mkPCastTo(mce, finalVtype, curr );
1788}
1789
1790
1791/*------------------------------------------------------------*/
1792/*--- Generating expensive sequences for exact carry-chain ---*/
1793/*--- propagation in add/sub and related operations. ---*/
1794/*------------------------------------------------------------*/
1795
1796static
sewardjd5204dc2004-12-31 01:16:11 +00001797IRAtom* expensiveAddSub ( MCEnv* mce,
1798 Bool add,
1799 IRType ty,
1800 IRAtom* qaa, IRAtom* qbb,
1801 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001802{
sewardj7cf97ee2004-11-28 14:25:01 +00001803 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001804 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001805
sewardj95448072004-11-22 20:19:51 +00001806 tl_assert(isShadowAtom(mce,qaa));
1807 tl_assert(isShadowAtom(mce,qbb));
1808 tl_assert(isOriginalAtom(mce,aa));
1809 tl_assert(isOriginalAtom(mce,bb));
1810 tl_assert(sameKindedAtoms(qaa,aa));
1811 tl_assert(sameKindedAtoms(qbb,bb));
1812
sewardjd5204dc2004-12-31 01:16:11 +00001813 switch (ty) {
1814 case Ity_I32:
1815 opAND = Iop_And32;
1816 opOR = Iop_Or32;
1817 opXOR = Iop_Xor32;
1818 opNOT = Iop_Not32;
1819 opADD = Iop_Add32;
1820 opSUB = Iop_Sub32;
1821 break;
tomd9774d72005-06-27 08:11:01 +00001822 case Ity_I64:
1823 opAND = Iop_And64;
1824 opOR = Iop_Or64;
1825 opXOR = Iop_Xor64;
1826 opNOT = Iop_Not64;
1827 opADD = Iop_Add64;
1828 opSUB = Iop_Sub64;
1829 break;
sewardjd5204dc2004-12-31 01:16:11 +00001830 default:
1831 VG_(tool_panic)("expensiveAddSub");
1832 }
sewardj95448072004-11-22 20:19:51 +00001833
1834 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001835 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001836 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001837 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001838
1839 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001840 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001841 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001842 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001843
1844 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001845 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001846
1847 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001848 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001849
sewardjd5204dc2004-12-31 01:16:11 +00001850 if (add) {
1851 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1852 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001853 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001854 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001855 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1856 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001857 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001858 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1859 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001860 )
sewardj95448072004-11-22 20:19:51 +00001861 )
sewardjd5204dc2004-12-31 01:16:11 +00001862 )
1863 );
1864 } else {
1865 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1866 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001867 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001868 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001869 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1870 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001871 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001872 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1873 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001874 )
1875 )
1876 )
1877 );
1878 }
1879
sewardj95448072004-11-22 20:19:51 +00001880}
1881
1882
sewardj4cfa81b2012-11-08 10:58:16 +00001883static
1884IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
1885 IRAtom* atom, IRAtom* vatom )
1886{
1887 IRType ty;
1888 IROp xorOp, subOp, andOp;
1889 IRExpr *one;
1890 IRAtom *improver, *improved;
1891 tl_assert(isShadowAtom(mce,vatom));
1892 tl_assert(isOriginalAtom(mce,atom));
1893 tl_assert(sameKindedAtoms(atom,vatom));
1894
1895 switch (czop) {
1896 case Iop_Ctz32:
1897 ty = Ity_I32;
1898 xorOp = Iop_Xor32;
1899 subOp = Iop_Sub32;
1900 andOp = Iop_And32;
1901 one = mkU32(1);
1902 break;
1903 case Iop_Ctz64:
1904 ty = Ity_I64;
1905 xorOp = Iop_Xor64;
1906 subOp = Iop_Sub64;
1907 andOp = Iop_And64;
1908 one = mkU64(1);
1909 break;
1910 default:
1911 ppIROp(czop);
1912 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
1913 }
1914
1915 // improver = atom ^ (atom - 1)
1916 //
1917 // That is, improver has its low ctz(atom) bits equal to one;
1918 // higher bits (if any) equal to zero.
1919 improver = assignNew('V', mce,ty,
1920 binop(xorOp,
1921 atom,
1922 assignNew('V', mce, ty,
1923 binop(subOp, atom, one))));
1924
1925 // improved = vatom & improver
1926 //
1927 // That is, treat any V bits above the first ctz(atom) bits as
1928 // "defined".
1929 improved = assignNew('V', mce, ty,
1930 binop(andOp, vatom, improver));
1931
1932 // Return pessimizing cast of improved.
1933 return mkPCastTo(mce, ty, improved);
1934}
1935
1936
sewardj95448072004-11-22 20:19:51 +00001937/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001938/*--- Scalar shifts. ---*/
1939/*------------------------------------------------------------*/
1940
1941/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1942 idea is to shift the definedness bits by the original shift amount.
1943 This introduces 0s ("defined") in new positions for left shifts and
1944 unsigned right shifts, and copies the top definedness bit for
1945 signed right shifts. So, conveniently, applying the original shift
1946 operator to the definedness bits for the left arg is exactly the
1947 right thing to do:
1948
1949 (qaa << bb)
1950
1951 However if the shift amount is undefined then the whole result
1952 is undefined. Hence need:
1953
1954 (qaa << bb) `UifU` PCast(qbb)
1955
1956 If the shift amount bb is a literal than qbb will say 'all defined'
1957 and the UifU and PCast will get folded out by post-instrumentation
1958 optimisation.
1959*/
1960static IRAtom* scalarShift ( MCEnv* mce,
1961 IRType ty,
1962 IROp original_op,
1963 IRAtom* qaa, IRAtom* qbb,
1964 IRAtom* aa, IRAtom* bb )
1965{
1966 tl_assert(isShadowAtom(mce,qaa));
1967 tl_assert(isShadowAtom(mce,qbb));
1968 tl_assert(isOriginalAtom(mce,aa));
1969 tl_assert(isOriginalAtom(mce,bb));
1970 tl_assert(sameKindedAtoms(qaa,aa));
1971 tl_assert(sameKindedAtoms(qbb,bb));
1972 return
1973 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001974 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00001975 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00001976 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00001977 mkPCastTo(mce, ty, qbb)
1978 )
1979 );
1980}
1981
1982
1983/*------------------------------------------------------------*/
1984/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00001985/*------------------------------------------------------------*/
1986
sewardja1d93302004-12-12 16:45:06 +00001987/* Vector pessimisation -- pessimise within each lane individually. */
1988
1989static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1990{
sewardj7cf4e6b2008-05-01 20:24:26 +00001991 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00001992}
1993
1994static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1995{
sewardj7cf4e6b2008-05-01 20:24:26 +00001996 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00001997}
1998
1999static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
2000{
sewardj7cf4e6b2008-05-01 20:24:26 +00002001 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00002002}
2003
2004static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
2005{
sewardj7cf4e6b2008-05-01 20:24:26 +00002006 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00002007}
2008
sewardj350e8f72012-06-25 07:52:15 +00002009static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
2010{
2011 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
2012}
2013
2014static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
2015{
2016 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
2017}
2018
sewardjacd2e912005-01-13 19:17:06 +00002019static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
2020{
sewardj7cf4e6b2008-05-01 20:24:26 +00002021 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00002022}
2023
sewardja2f30952013-03-27 11:40:02 +00002024static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at )
2025{
2026 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at));
2027}
2028
sewardjacd2e912005-01-13 19:17:06 +00002029static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
2030{
sewardj7cf4e6b2008-05-01 20:24:26 +00002031 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00002032}
2033
sewardja2f30952013-03-27 11:40:02 +00002034static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at )
2035{
2036 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at));
2037}
2038
sewardjacd2e912005-01-13 19:17:06 +00002039static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
2040{
sewardj7cf4e6b2008-05-01 20:24:26 +00002041 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00002042}
2043
sewardjc678b852010-09-22 00:58:51 +00002044static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
2045{
2046 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
2047}
2048
2049static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
2050{
2051 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
2052}
2053
sewardja1d93302004-12-12 16:45:06 +00002054
sewardj3245c912004-12-10 14:58:26 +00002055/* Here's a simple scheme capable of handling ops derived from SSE1
2056 code and while only generating ops that can be efficiently
2057 implemented in SSE1. */
2058
2059/* All-lanes versions are straightforward:
2060
sewardj20d38f22005-02-07 23:50:18 +00002061 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00002062
2063 unary32Fx4(x,y) ==> PCast32x4(x#)
2064
2065 Lowest-lane-only versions are more complex:
2066
sewardj20d38f22005-02-07 23:50:18 +00002067 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00002068 x#,
sewardj20d38f22005-02-07 23:50:18 +00002069 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00002070 )
2071
2072 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00002073 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00002074 obvious scheme of taking the bottom 32 bits of each operand
2075 and doing a 32-bit UifU. Basically since UifU is fast and
2076 chopping lanes off vector values is slow.
2077
2078 Finally:
2079
sewardj20d38f22005-02-07 23:50:18 +00002080 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00002081 x#,
sewardj20d38f22005-02-07 23:50:18 +00002082 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00002083 )
2084
2085 Where:
2086
2087 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2088 PCast32x4(v#) = CmpNEZ32x4(v#)
2089*/
2090
2091static
2092IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2093{
2094 IRAtom* at;
2095 tl_assert(isShadowAtom(mce, vatomX));
2096 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002097 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002098 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00002099 return at;
2100}
2101
2102static
2103IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
2104{
2105 IRAtom* at;
2106 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002107 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00002108 return at;
2109}
2110
2111static
2112IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2113{
2114 IRAtom* at;
2115 tl_assert(isShadowAtom(mce, vatomX));
2116 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002117 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002118 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00002119 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002120 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00002121 return at;
2122}
2123
2124static
2125IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
2126{
2127 IRAtom* at;
2128 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002129 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00002130 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002131 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00002132 return at;
2133}
2134
sewardj0b070592004-12-10 21:44:22 +00002135/* --- ... and ... 64Fx2 versions of the same ... --- */
2136
2137static
2138IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2139{
2140 IRAtom* at;
2141 tl_assert(isShadowAtom(mce, vatomX));
2142 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002143 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002144 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00002145 return at;
2146}
2147
2148static
2149IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
2150{
2151 IRAtom* at;
2152 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002153 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002154 return at;
2155}
2156
2157static
2158IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2159{
2160 IRAtom* at;
2161 tl_assert(isShadowAtom(mce, vatomX));
2162 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002163 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002164 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00002165 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002166 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002167 return at;
2168}
2169
2170static
2171IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2172{
2173 IRAtom* at;
2174 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002175 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002176 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002177 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002178 return at;
2179}
2180
sewardj57f92b02010-08-22 11:54:14 +00002181/* --- --- ... and ... 32Fx2 versions of the same --- --- */
2182
2183static
2184IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2185{
2186 IRAtom* at;
2187 tl_assert(isShadowAtom(mce, vatomX));
2188 tl_assert(isShadowAtom(mce, vatomY));
2189 at = mkUifU64(mce, vatomX, vatomY);
2190 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2191 return at;
2192}
2193
2194static
2195IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2196{
2197 IRAtom* at;
2198 tl_assert(isShadowAtom(mce, vatomX));
2199 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2200 return at;
2201}
2202
sewardj350e8f72012-06-25 07:52:15 +00002203/* --- ... and ... 64Fx4 versions of the same ... --- */
2204
2205static
2206IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2207{
2208 IRAtom* at;
2209 tl_assert(isShadowAtom(mce, vatomX));
2210 tl_assert(isShadowAtom(mce, vatomY));
2211 at = mkUifUV256(mce, vatomX, vatomY);
2212 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2213 return at;
2214}
2215
2216static
2217IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2218{
2219 IRAtom* at;
2220 tl_assert(isShadowAtom(mce, vatomX));
2221 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2222 return at;
2223}
2224
2225/* --- ... and ... 32Fx8 versions of the same ... --- */
2226
2227static
2228IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2229{
2230 IRAtom* at;
2231 tl_assert(isShadowAtom(mce, vatomX));
2232 tl_assert(isShadowAtom(mce, vatomY));
2233 at = mkUifUV256(mce, vatomX, vatomY);
2234 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2235 return at;
2236}
2237
2238static
2239IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2240{
2241 IRAtom* at;
2242 tl_assert(isShadowAtom(mce, vatomX));
2243 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2244 return at;
2245}
2246
sewardja1d93302004-12-12 16:45:06 +00002247/* --- --- Vector saturated narrowing --- --- */
2248
sewardjb5a29232011-10-22 09:29:41 +00002249/* We used to do something very clever here, but on closer inspection
2250 (2011-Jun-15), and in particular bug #279698, it turns out to be
2251 wrong. Part of the problem came from the fact that for a long
2252 time, the IR primops to do with saturated narrowing were
2253 underspecified and managed to confuse multiple cases which needed
2254 to be separate: the op names had a signedness qualifier, but in
2255 fact the source and destination signednesses needed to be specified
2256 independently, so the op names really need two independent
2257 signedness specifiers.
sewardja1d93302004-12-12 16:45:06 +00002258
sewardjb5a29232011-10-22 09:29:41 +00002259 As of 2011-Jun-15 (ish) the underspecification was sorted out
2260 properly. The incorrect instrumentation remained, though. That
2261 has now (2011-Oct-22) been fixed.
sewardja1d93302004-12-12 16:45:06 +00002262
sewardjb5a29232011-10-22 09:29:41 +00002263 What we now do is simple:
sewardja1d93302004-12-12 16:45:06 +00002264
sewardjb5a29232011-10-22 09:29:41 +00002265 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2266 number of lanes, X is the source lane width and signedness, and Y
2267 is the destination lane width and signedness. In all cases the
2268 destination lane width is half the source lane width, so the names
2269 have a bit of redundancy, but are at least easy to read.
sewardja1d93302004-12-12 16:45:06 +00002270
sewardjb5a29232011-10-22 09:29:41 +00002271 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2272 to unsigned 16s.
sewardja1d93302004-12-12 16:45:06 +00002273
sewardjb5a29232011-10-22 09:29:41 +00002274 Let Vanilla(OP) be a function that takes OP, one of these
2275 saturating narrowing ops, and produces the same "shaped" narrowing
2276 op which is not saturating, but merely dumps the most significant
2277 bits. "same shape" means that the lane numbers and widths are the
2278 same as with OP.
sewardja1d93302004-12-12 16:45:06 +00002279
sewardjb5a29232011-10-22 09:29:41 +00002280 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2281 = Iop_NarrowBin32to16x8,
2282 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2283 dumping the top half of each lane.
sewardja1d93302004-12-12 16:45:06 +00002284
sewardjb5a29232011-10-22 09:29:41 +00002285 So, with that in place, the scheme is simple, and it is simple to
2286 pessimise each lane individually and then apply Vanilla(OP) so as
2287 to get the result in the right "shape". If the original OP is
2288 QNarrowBinXtoYxZ then we produce
sewardja1d93302004-12-12 16:45:06 +00002289
sewardjb5a29232011-10-22 09:29:41 +00002290 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
sewardj9beeb0a2011-06-15 15:11:07 +00002291
sewardjb5a29232011-10-22 09:29:41 +00002292 or for the case when OP is unary (Iop_QNarrowUn*)
2293
2294 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
sewardja1d93302004-12-12 16:45:06 +00002295*/
2296static
sewardjb5a29232011-10-22 09:29:41 +00002297IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2298{
2299 switch (qnarrowOp) {
2300 /* Binary: (128, 128) -> 128 */
2301 case Iop_QNarrowBin16Sto8Ux16:
2302 case Iop_QNarrowBin16Sto8Sx16:
2303 case Iop_QNarrowBin16Uto8Ux16:
2304 return Iop_NarrowBin16to8x16;
2305 case Iop_QNarrowBin32Sto16Ux8:
2306 case Iop_QNarrowBin32Sto16Sx8:
2307 case Iop_QNarrowBin32Uto16Ux8:
2308 return Iop_NarrowBin32to16x8;
2309 /* Binary: (64, 64) -> 64 */
2310 case Iop_QNarrowBin32Sto16Sx4:
2311 return Iop_NarrowBin32to16x4;
2312 case Iop_QNarrowBin16Sto8Ux8:
2313 case Iop_QNarrowBin16Sto8Sx8:
2314 return Iop_NarrowBin16to8x8;
2315 /* Unary: 128 -> 64 */
2316 case Iop_QNarrowUn64Uto32Ux2:
2317 case Iop_QNarrowUn64Sto32Sx2:
2318 case Iop_QNarrowUn64Sto32Ux2:
2319 return Iop_NarrowUn64to32x2;
2320 case Iop_QNarrowUn32Uto16Ux4:
2321 case Iop_QNarrowUn32Sto16Sx4:
2322 case Iop_QNarrowUn32Sto16Ux4:
2323 return Iop_NarrowUn32to16x4;
2324 case Iop_QNarrowUn16Uto8Ux8:
2325 case Iop_QNarrowUn16Sto8Sx8:
2326 case Iop_QNarrowUn16Sto8Ux8:
2327 return Iop_NarrowUn16to8x8;
2328 default:
2329 ppIROp(qnarrowOp);
2330 VG_(tool_panic)("vanillaNarrowOpOfShape");
2331 }
2332}
2333
2334static
sewardj7ee7d852011-06-16 11:37:21 +00002335IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2336 IRAtom* vatom1, IRAtom* vatom2)
sewardja1d93302004-12-12 16:45:06 +00002337{
2338 IRAtom *at1, *at2, *at3;
2339 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2340 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002341 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2342 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2343 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2344 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2345 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2346 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2347 default: VG_(tool_panic)("vectorNarrowBinV128");
sewardja1d93302004-12-12 16:45:06 +00002348 }
sewardjb5a29232011-10-22 09:29:41 +00002349 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardja1d93302004-12-12 16:45:06 +00002350 tl_assert(isShadowAtom(mce,vatom1));
2351 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002352 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2353 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002354 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00002355 return at3;
2356}
2357
sewardjacd2e912005-01-13 19:17:06 +00002358static
sewardj7ee7d852011-06-16 11:37:21 +00002359IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2360 IRAtom* vatom1, IRAtom* vatom2)
sewardjacd2e912005-01-13 19:17:06 +00002361{
2362 IRAtom *at1, *at2, *at3;
2363 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2364 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002365 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2366 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2367 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2368 default: VG_(tool_panic)("vectorNarrowBin64");
sewardjacd2e912005-01-13 19:17:06 +00002369 }
sewardjb5a29232011-10-22 09:29:41 +00002370 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardjacd2e912005-01-13 19:17:06 +00002371 tl_assert(isShadowAtom(mce,vatom1));
2372 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002373 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2374 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002375 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00002376 return at3;
2377}
2378
sewardj57f92b02010-08-22 11:54:14 +00002379static
sewardjb5a29232011-10-22 09:29:41 +00002380IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
sewardj7ee7d852011-06-16 11:37:21 +00002381 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002382{
2383 IRAtom *at1, *at2;
2384 IRAtom* (*pcast)( MCEnv*, IRAtom* );
sewardjb5a29232011-10-22 09:29:41 +00002385 tl_assert(isShadowAtom(mce,vatom1));
2386 /* For vanilla narrowing (non-saturating), we can just apply
2387 the op directly to the V bits. */
2388 switch (narrow_op) {
2389 case Iop_NarrowUn16to8x8:
2390 case Iop_NarrowUn32to16x4:
2391 case Iop_NarrowUn64to32x2:
2392 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2393 return at1;
2394 default:
2395 break; /* Do Plan B */
2396 }
2397 /* Plan B: for ops that involve a saturation operation on the args,
2398 we must PCast before the vanilla narrow. */
2399 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002400 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2401 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2402 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2403 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2404 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2405 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2406 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2407 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2408 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2409 default: VG_(tool_panic)("vectorNarrowUnV128");
sewardj57f92b02010-08-22 11:54:14 +00002410 }
sewardjb5a29232011-10-22 09:29:41 +00002411 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardj57f92b02010-08-22 11:54:14 +00002412 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
sewardjb5a29232011-10-22 09:29:41 +00002413 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
sewardj57f92b02010-08-22 11:54:14 +00002414 return at2;
2415}
2416
2417static
sewardj7ee7d852011-06-16 11:37:21 +00002418IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2419 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002420{
2421 IRAtom *at1, *at2;
2422 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2423 switch (longen_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002424 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2425 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2426 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2427 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2428 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2429 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2430 default: VG_(tool_panic)("vectorWidenI64");
sewardj57f92b02010-08-22 11:54:14 +00002431 }
2432 tl_assert(isShadowAtom(mce,vatom1));
2433 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2434 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2435 return at2;
2436}
2437
sewardja1d93302004-12-12 16:45:06 +00002438
2439/* --- --- Vector integer arithmetic --- --- */
2440
2441/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00002442
sewardja2f30952013-03-27 11:40:02 +00002443/* --- V256-bit versions --- */
2444
2445static
2446IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2447{
2448 IRAtom* at;
2449 at = mkUifUV256(mce, vatom1, vatom2);
2450 at = mkPCast8x32(mce, at);
2451 return at;
2452}
2453
2454static
2455IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2456{
2457 IRAtom* at;
2458 at = mkUifUV256(mce, vatom1, vatom2);
2459 at = mkPCast16x16(mce, at);
2460 return at;
2461}
2462
2463static
2464IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2465{
2466 IRAtom* at;
2467 at = mkUifUV256(mce, vatom1, vatom2);
2468 at = mkPCast32x8(mce, at);
2469 return at;
2470}
2471
2472static
2473IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2474{
2475 IRAtom* at;
2476 at = mkUifUV256(mce, vatom1, vatom2);
2477 at = mkPCast64x4(mce, at);
2478 return at;
2479}
2480
sewardj20d38f22005-02-07 23:50:18 +00002481/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00002482
sewardja1d93302004-12-12 16:45:06 +00002483static
2484IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2485{
2486 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002487 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002488 at = mkPCast8x16(mce, at);
2489 return at;
2490}
2491
2492static
2493IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2494{
2495 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002496 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002497 at = mkPCast16x8(mce, at);
2498 return at;
2499}
2500
2501static
2502IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2503{
2504 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002505 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002506 at = mkPCast32x4(mce, at);
2507 return at;
2508}
2509
2510static
2511IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2512{
2513 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002514 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002515 at = mkPCast64x2(mce, at);
2516 return at;
2517}
sewardj3245c912004-12-10 14:58:26 +00002518
sewardjacd2e912005-01-13 19:17:06 +00002519/* --- 64-bit versions --- */
2520
2521static
2522IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2523{
2524 IRAtom* at;
2525 at = mkUifU64(mce, vatom1, vatom2);
2526 at = mkPCast8x8(mce, at);
2527 return at;
2528}
2529
2530static
2531IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2532{
2533 IRAtom* at;
2534 at = mkUifU64(mce, vatom1, vatom2);
2535 at = mkPCast16x4(mce, at);
2536 return at;
2537}
2538
2539static
2540IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2541{
2542 IRAtom* at;
2543 at = mkUifU64(mce, vatom1, vatom2);
2544 at = mkPCast32x2(mce, at);
2545 return at;
2546}
2547
sewardj57f92b02010-08-22 11:54:14 +00002548static
2549IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2550{
2551 IRAtom* at;
2552 at = mkUifU64(mce, vatom1, vatom2);
2553 at = mkPCastTo(mce, Ity_I64, at);
2554 return at;
2555}
2556
sewardjc678b852010-09-22 00:58:51 +00002557/* --- 32-bit versions --- */
2558
2559static
2560IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2561{
2562 IRAtom* at;
2563 at = mkUifU32(mce, vatom1, vatom2);
2564 at = mkPCast8x4(mce, at);
2565 return at;
2566}
2567
2568static
2569IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2570{
2571 IRAtom* at;
2572 at = mkUifU32(mce, vatom1, vatom2);
2573 at = mkPCast16x2(mce, at);
2574 return at;
2575}
2576
sewardj3245c912004-12-10 14:58:26 +00002577
2578/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002579/*--- Generate shadow values from all kinds of IRExprs. ---*/
2580/*------------------------------------------------------------*/
2581
2582static
sewardje91cea72006-02-08 19:32:02 +00002583IRAtom* expr2vbits_Qop ( MCEnv* mce,
2584 IROp op,
2585 IRAtom* atom1, IRAtom* atom2,
2586 IRAtom* atom3, IRAtom* atom4 )
2587{
2588 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2589 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2590 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2591 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2592
2593 tl_assert(isOriginalAtom(mce,atom1));
2594 tl_assert(isOriginalAtom(mce,atom2));
2595 tl_assert(isOriginalAtom(mce,atom3));
2596 tl_assert(isOriginalAtom(mce,atom4));
2597 tl_assert(isShadowAtom(mce,vatom1));
2598 tl_assert(isShadowAtom(mce,vatom2));
2599 tl_assert(isShadowAtom(mce,vatom3));
2600 tl_assert(isShadowAtom(mce,vatom4));
2601 tl_assert(sameKindedAtoms(atom1,vatom1));
2602 tl_assert(sameKindedAtoms(atom2,vatom2));
2603 tl_assert(sameKindedAtoms(atom3,vatom3));
2604 tl_assert(sameKindedAtoms(atom4,vatom4));
2605 switch (op) {
2606 case Iop_MAddF64:
2607 case Iop_MAddF64r32:
2608 case Iop_MSubF64:
2609 case Iop_MSubF64r32:
2610 /* I32(rm) x F64 x F64 x F64 -> F64 */
2611 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
sewardjb5b87402011-03-07 16:05:35 +00002612
2613 case Iop_MAddF32:
2614 case Iop_MSubF32:
2615 /* I32(rm) x F32 x F32 x F32 -> F32 */
2616 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2617
sewardj350e8f72012-06-25 07:52:15 +00002618 /* V256-bit data-steering */
2619 case Iop_64x4toV256:
2620 return assignNew('V', mce, Ity_V256,
2621 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2622
sewardje91cea72006-02-08 19:32:02 +00002623 default:
2624 ppIROp(op);
2625 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2626 }
2627}
2628
2629
2630static
sewardjed69fdb2006-02-03 16:12:27 +00002631IRAtom* expr2vbits_Triop ( MCEnv* mce,
2632 IROp op,
2633 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2634{
sewardjed69fdb2006-02-03 16:12:27 +00002635 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2636 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2637 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2638
2639 tl_assert(isOriginalAtom(mce,atom1));
2640 tl_assert(isOriginalAtom(mce,atom2));
2641 tl_assert(isOriginalAtom(mce,atom3));
2642 tl_assert(isShadowAtom(mce,vatom1));
2643 tl_assert(isShadowAtom(mce,vatom2));
2644 tl_assert(isShadowAtom(mce,vatom3));
2645 tl_assert(sameKindedAtoms(atom1,vatom1));
2646 tl_assert(sameKindedAtoms(atom2,vatom2));
2647 tl_assert(sameKindedAtoms(atom3,vatom3));
2648 switch (op) {
sewardjb5b87402011-03-07 16:05:35 +00002649 case Iop_AddF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002650 case Iop_AddD128:
sewardjb5b87402011-03-07 16:05:35 +00002651 case Iop_SubF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002652 case Iop_SubD128:
sewardjb5b87402011-03-07 16:05:35 +00002653 case Iop_MulF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002654 case Iop_MulD128:
sewardjb5b87402011-03-07 16:05:35 +00002655 case Iop_DivF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002656 case Iop_DivD128:
sewardj18c72fa2012-04-23 11:22:05 +00002657 case Iop_QuantizeD128:
2658 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
sewardjb5b87402011-03-07 16:05:35 +00002659 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardjed69fdb2006-02-03 16:12:27 +00002660 case Iop_AddF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002661 case Iop_AddD64:
sewardjed69fdb2006-02-03 16:12:27 +00002662 case Iop_AddF64r32:
2663 case Iop_SubF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002664 case Iop_SubD64:
sewardjed69fdb2006-02-03 16:12:27 +00002665 case Iop_SubF64r32:
2666 case Iop_MulF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002667 case Iop_MulD64:
sewardjed69fdb2006-02-03 16:12:27 +00002668 case Iop_MulF64r32:
2669 case Iop_DivF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002670 case Iop_DivD64:
sewardjed69fdb2006-02-03 16:12:27 +00002671 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002672 case Iop_ScaleF64:
2673 case Iop_Yl2xF64:
2674 case Iop_Yl2xp1F64:
2675 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002676 case Iop_PRemF64:
2677 case Iop_PRem1F64:
sewardj18c72fa2012-04-23 11:22:05 +00002678 case Iop_QuantizeD64:
2679 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
sewardjed69fdb2006-02-03 16:12:27 +00002680 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002681 case Iop_PRemC3210F64:
2682 case Iop_PRem1C3210F64:
2683 /* I32(rm) x F64 x F64 -> I32 */
2684 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj59570ff2010-01-01 11:59:33 +00002685 case Iop_AddF32:
2686 case Iop_SubF32:
2687 case Iop_MulF32:
2688 case Iop_DivF32:
2689 /* I32(rm) x F32 x F32 -> I32 */
2690 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj18c72fa2012-04-23 11:22:05 +00002691 case Iop_SignificanceRoundD64:
florian733b4db2013-06-06 19:13:29 +00002692 /* IRRoundingMode(I32) x I8 x D64 -> D64 */
sewardj18c72fa2012-04-23 11:22:05 +00002693 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2694 case Iop_SignificanceRoundD128:
florian733b4db2013-06-06 19:13:29 +00002695 /* IRRoundingMode(I32) x I8 x D128 -> D128 */
sewardj18c72fa2012-04-23 11:22:05 +00002696 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardj57f92b02010-08-22 11:54:14 +00002697 case Iop_ExtractV128:
sewardjb9e6d242013-05-11 13:42:08 +00002698 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002699 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2700 case Iop_Extract64:
sewardjb9e6d242013-05-11 13:42:08 +00002701 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002702 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2703 case Iop_SetElem8x8:
2704 case Iop_SetElem16x4:
2705 case Iop_SetElem32x2:
sewardjb9e6d242013-05-11 13:42:08 +00002706 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002707 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
sewardjed69fdb2006-02-03 16:12:27 +00002708 default:
2709 ppIROp(op);
2710 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2711 }
2712}
2713
2714
2715static
sewardj95448072004-11-22 20:19:51 +00002716IRAtom* expr2vbits_Binop ( MCEnv* mce,
2717 IROp op,
2718 IRAtom* atom1, IRAtom* atom2 )
2719{
2720 IRType and_or_ty;
2721 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2722 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2723 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2724
2725 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2726 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2727
2728 tl_assert(isOriginalAtom(mce,atom1));
2729 tl_assert(isOriginalAtom(mce,atom2));
2730 tl_assert(isShadowAtom(mce,vatom1));
2731 tl_assert(isShadowAtom(mce,vatom2));
2732 tl_assert(sameKindedAtoms(atom1,vatom1));
2733 tl_assert(sameKindedAtoms(atom2,vatom2));
2734 switch (op) {
2735
sewardjc678b852010-09-22 00:58:51 +00002736 /* 32-bit SIMD */
2737
2738 case Iop_Add16x2:
2739 case Iop_HAdd16Ux2:
2740 case Iop_HAdd16Sx2:
2741 case Iop_Sub16x2:
2742 case Iop_HSub16Ux2:
2743 case Iop_HSub16Sx2:
2744 case Iop_QAdd16Sx2:
2745 case Iop_QSub16Sx2:
sewardj9fb31092012-09-17 15:28:46 +00002746 case Iop_QSub16Ux2:
sewardjc678b852010-09-22 00:58:51 +00002747 return binary16Ix2(mce, vatom1, vatom2);
2748
2749 case Iop_Add8x4:
2750 case Iop_HAdd8Ux4:
2751 case Iop_HAdd8Sx4:
2752 case Iop_Sub8x4:
2753 case Iop_HSub8Ux4:
2754 case Iop_HSub8Sx4:
2755 case Iop_QSub8Ux4:
2756 case Iop_QAdd8Ux4:
2757 case Iop_QSub8Sx4:
2758 case Iop_QAdd8Sx4:
2759 return binary8Ix4(mce, vatom1, vatom2);
2760
sewardjacd2e912005-01-13 19:17:06 +00002761 /* 64-bit SIMD */
2762
sewardj57f92b02010-08-22 11:54:14 +00002763 case Iop_ShrN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002764 case Iop_ShrN16x4:
2765 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002766 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002767 case Iop_SarN16x4:
2768 case Iop_SarN32x2:
2769 case Iop_ShlN16x4:
2770 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002771 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002772 /* Same scheme as with all other shifts. */
sewardjb9e6d242013-05-11 13:42:08 +00002773 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00002774 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002775
sewardj7ee7d852011-06-16 11:37:21 +00002776 case Iop_QNarrowBin32Sto16Sx4:
2777 case Iop_QNarrowBin16Sto8Sx8:
2778 case Iop_QNarrowBin16Sto8Ux8:
2779 return vectorNarrowBin64(mce, op, vatom1, vatom2);
sewardjacd2e912005-01-13 19:17:06 +00002780
2781 case Iop_Min8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002782 case Iop_Min8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002783 case Iop_Max8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002784 case Iop_Max8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002785 case Iop_Avg8Ux8:
2786 case Iop_QSub8Sx8:
2787 case Iop_QSub8Ux8:
2788 case Iop_Sub8x8:
2789 case Iop_CmpGT8Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002790 case Iop_CmpGT8Ux8:
sewardjacd2e912005-01-13 19:17:06 +00002791 case Iop_CmpEQ8x8:
2792 case Iop_QAdd8Sx8:
2793 case Iop_QAdd8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002794 case Iop_QSal8x8:
2795 case Iop_QShl8x8:
sewardjacd2e912005-01-13 19:17:06 +00002796 case Iop_Add8x8:
sewardj57f92b02010-08-22 11:54:14 +00002797 case Iop_Mul8x8:
2798 case Iop_PolynomialMul8x8:
sewardjacd2e912005-01-13 19:17:06 +00002799 return binary8Ix8(mce, vatom1, vatom2);
2800
2801 case Iop_Min16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002802 case Iop_Min16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002803 case Iop_Max16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002804 case Iop_Max16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002805 case Iop_Avg16Ux4:
2806 case Iop_QSub16Ux4:
2807 case Iop_QSub16Sx4:
2808 case Iop_Sub16x4:
2809 case Iop_Mul16x4:
2810 case Iop_MulHi16Sx4:
2811 case Iop_MulHi16Ux4:
2812 case Iop_CmpGT16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002813 case Iop_CmpGT16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002814 case Iop_CmpEQ16x4:
2815 case Iop_QAdd16Sx4:
2816 case Iop_QAdd16Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002817 case Iop_QSal16x4:
2818 case Iop_QShl16x4:
sewardjacd2e912005-01-13 19:17:06 +00002819 case Iop_Add16x4:
sewardj57f92b02010-08-22 11:54:14 +00002820 case Iop_QDMulHi16Sx4:
2821 case Iop_QRDMulHi16Sx4:
sewardjacd2e912005-01-13 19:17:06 +00002822 return binary16Ix4(mce, vatom1, vatom2);
2823
2824 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00002825 case Iop_Mul32x2:
sewardj57f92b02010-08-22 11:54:14 +00002826 case Iop_Max32Sx2:
2827 case Iop_Max32Ux2:
2828 case Iop_Min32Sx2:
2829 case Iop_Min32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002830 case Iop_CmpGT32Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002831 case Iop_CmpGT32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002832 case Iop_CmpEQ32x2:
2833 case Iop_Add32x2:
sewardj57f92b02010-08-22 11:54:14 +00002834 case Iop_QAdd32Ux2:
2835 case Iop_QAdd32Sx2:
2836 case Iop_QSub32Ux2:
2837 case Iop_QSub32Sx2:
2838 case Iop_QSal32x2:
2839 case Iop_QShl32x2:
2840 case Iop_QDMulHi32Sx2:
2841 case Iop_QRDMulHi32Sx2:
sewardjacd2e912005-01-13 19:17:06 +00002842 return binary32Ix2(mce, vatom1, vatom2);
2843
sewardj57f92b02010-08-22 11:54:14 +00002844 case Iop_QSub64Ux1:
2845 case Iop_QSub64Sx1:
2846 case Iop_QAdd64Ux1:
2847 case Iop_QAdd64Sx1:
2848 case Iop_QSal64x1:
2849 case Iop_QShl64x1:
2850 case Iop_Sal64x1:
2851 return binary64Ix1(mce, vatom1, vatom2);
2852
2853 case Iop_QShlN8Sx8:
2854 case Iop_QShlN8x8:
2855 case Iop_QSalN8x8:
sewardjb9e6d242013-05-11 13:42:08 +00002856 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002857 return mkPCast8x8(mce, vatom1);
2858
2859 case Iop_QShlN16Sx4:
2860 case Iop_QShlN16x4:
2861 case Iop_QSalN16x4:
sewardjb9e6d242013-05-11 13:42:08 +00002862 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002863 return mkPCast16x4(mce, vatom1);
2864
2865 case Iop_QShlN32Sx2:
2866 case Iop_QShlN32x2:
2867 case Iop_QSalN32x2:
sewardjb9e6d242013-05-11 13:42:08 +00002868 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002869 return mkPCast32x2(mce, vatom1);
2870
2871 case Iop_QShlN64Sx1:
2872 case Iop_QShlN64x1:
2873 case Iop_QSalN64x1:
sewardjb9e6d242013-05-11 13:42:08 +00002874 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002875 return mkPCast32x2(mce, vatom1);
2876
2877 case Iop_PwMax32Sx2:
2878 case Iop_PwMax32Ux2:
2879 case Iop_PwMin32Sx2:
2880 case Iop_PwMin32Ux2:
2881 case Iop_PwMax32Fx2:
2882 case Iop_PwMin32Fx2:
sewardj350e8f72012-06-25 07:52:15 +00002883 return assignNew('V', mce, Ity_I64,
2884 binop(Iop_PwMax32Ux2,
2885 mkPCast32x2(mce, vatom1),
2886 mkPCast32x2(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002887
2888 case Iop_PwMax16Sx4:
2889 case Iop_PwMax16Ux4:
2890 case Iop_PwMin16Sx4:
2891 case Iop_PwMin16Ux4:
sewardj350e8f72012-06-25 07:52:15 +00002892 return assignNew('V', mce, Ity_I64,
2893 binop(Iop_PwMax16Ux4,
2894 mkPCast16x4(mce, vatom1),
2895 mkPCast16x4(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002896
2897 case Iop_PwMax8Sx8:
2898 case Iop_PwMax8Ux8:
2899 case Iop_PwMin8Sx8:
2900 case Iop_PwMin8Ux8:
sewardj350e8f72012-06-25 07:52:15 +00002901 return assignNew('V', mce, Ity_I64,
2902 binop(Iop_PwMax8Ux8,
2903 mkPCast8x8(mce, vatom1),
2904 mkPCast8x8(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002905
2906 case Iop_PwAdd32x2:
2907 case Iop_PwAdd32Fx2:
2908 return mkPCast32x2(mce,
sewardj350e8f72012-06-25 07:52:15 +00002909 assignNew('V', mce, Ity_I64,
2910 binop(Iop_PwAdd32x2,
2911 mkPCast32x2(mce, vatom1),
2912 mkPCast32x2(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002913
2914 case Iop_PwAdd16x4:
2915 return mkPCast16x4(mce,
sewardj350e8f72012-06-25 07:52:15 +00002916 assignNew('V', mce, Ity_I64,
2917 binop(op, mkPCast16x4(mce, vatom1),
2918 mkPCast16x4(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002919
2920 case Iop_PwAdd8x8:
2921 return mkPCast8x8(mce,
sewardj350e8f72012-06-25 07:52:15 +00002922 assignNew('V', mce, Ity_I64,
2923 binop(op, mkPCast8x8(mce, vatom1),
2924 mkPCast8x8(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002925
2926 case Iop_Shl8x8:
2927 case Iop_Shr8x8:
2928 case Iop_Sar8x8:
2929 case Iop_Sal8x8:
2930 return mkUifU64(mce,
2931 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2932 mkPCast8x8(mce,vatom2)
2933 );
2934
2935 case Iop_Shl16x4:
2936 case Iop_Shr16x4:
2937 case Iop_Sar16x4:
2938 case Iop_Sal16x4:
2939 return mkUifU64(mce,
2940 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2941 mkPCast16x4(mce,vatom2)
2942 );
2943
2944 case Iop_Shl32x2:
2945 case Iop_Shr32x2:
2946 case Iop_Sar32x2:
2947 case Iop_Sal32x2:
2948 return mkUifU64(mce,
2949 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2950 mkPCast32x2(mce,vatom2)
2951 );
2952
sewardjacd2e912005-01-13 19:17:06 +00002953 /* 64-bit data-steering */
2954 case Iop_InterleaveLO32x2:
2955 case Iop_InterleaveLO16x4:
2956 case Iop_InterleaveLO8x8:
2957 case Iop_InterleaveHI32x2:
2958 case Iop_InterleaveHI16x4:
2959 case Iop_InterleaveHI8x8:
sewardj57f92b02010-08-22 11:54:14 +00002960 case Iop_CatOddLanes8x8:
2961 case Iop_CatEvenLanes8x8:
sewardj114a9172008-02-09 01:49:32 +00002962 case Iop_CatOddLanes16x4:
2963 case Iop_CatEvenLanes16x4:
sewardj57f92b02010-08-22 11:54:14 +00002964 case Iop_InterleaveOddLanes8x8:
2965 case Iop_InterleaveEvenLanes8x8:
2966 case Iop_InterleaveOddLanes16x4:
2967 case Iop_InterleaveEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002968 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00002969
sewardj57f92b02010-08-22 11:54:14 +00002970 case Iop_GetElem8x8:
sewardjb9e6d242013-05-11 13:42:08 +00002971 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002972 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2973 case Iop_GetElem16x4:
sewardjb9e6d242013-05-11 13:42:08 +00002974 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002975 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2976 case Iop_GetElem32x2:
sewardjb9e6d242013-05-11 13:42:08 +00002977 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002978 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2979
sewardj114a9172008-02-09 01:49:32 +00002980 /* Perm8x8: rearrange values in left arg using steering values
2981 from right arg. So rearrange the vbits in the same way but
2982 pessimise wrt steering values. */
2983 case Iop_Perm8x8:
2984 return mkUifU64(
2985 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002986 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00002987 mkPCast8x8(mce, vatom2)
2988 );
2989
sewardj20d38f22005-02-07 23:50:18 +00002990 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00002991
sewardj57f92b02010-08-22 11:54:14 +00002992 case Iop_ShrN8x16:
sewardja1d93302004-12-12 16:45:06 +00002993 case Iop_ShrN16x8:
2994 case Iop_ShrN32x4:
2995 case Iop_ShrN64x2:
sewardj57f92b02010-08-22 11:54:14 +00002996 case Iop_SarN8x16:
sewardja1d93302004-12-12 16:45:06 +00002997 case Iop_SarN16x8:
2998 case Iop_SarN32x4:
sewardj57f92b02010-08-22 11:54:14 +00002999 case Iop_SarN64x2:
3000 case Iop_ShlN8x16:
sewardja1d93302004-12-12 16:45:06 +00003001 case Iop_ShlN16x8:
3002 case Iop_ShlN32x4:
3003 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00003004 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3005 this is wrong now, scalar shifts are done properly lazily.
3006 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003007 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00003008 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00003009
sewardjcbf8be72005-11-10 18:34:41 +00003010 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00003011 case Iop_Shl8x16:
3012 case Iop_Shr8x16:
3013 case Iop_Sar8x16:
sewardj57f92b02010-08-22 11:54:14 +00003014 case Iop_Sal8x16:
sewardjcbf8be72005-11-10 18:34:41 +00003015 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00003016 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003017 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003018 mkPCast8x16(mce,vatom2)
3019 );
3020
3021 case Iop_Shl16x8:
3022 case Iop_Shr16x8:
3023 case Iop_Sar16x8:
sewardj57f92b02010-08-22 11:54:14 +00003024 case Iop_Sal16x8:
sewardjcbf8be72005-11-10 18:34:41 +00003025 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00003026 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003027 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003028 mkPCast16x8(mce,vatom2)
3029 );
3030
3031 case Iop_Shl32x4:
3032 case Iop_Shr32x4:
3033 case Iop_Sar32x4:
sewardj57f92b02010-08-22 11:54:14 +00003034 case Iop_Sal32x4:
sewardjcbf8be72005-11-10 18:34:41 +00003035 case Iop_Rol32x4:
sewardj43d60752005-11-10 18:13:01 +00003036 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003037 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003038 mkPCast32x4(mce,vatom2)
3039 );
3040
sewardj57f92b02010-08-22 11:54:14 +00003041 case Iop_Shl64x2:
3042 case Iop_Shr64x2:
3043 case Iop_Sar64x2:
3044 case Iop_Sal64x2:
3045 return mkUifUV128(mce,
3046 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3047 mkPCast64x2(mce,vatom2)
3048 );
3049
3050 case Iop_F32ToFixed32Ux4_RZ:
3051 case Iop_F32ToFixed32Sx4_RZ:
3052 case Iop_Fixed32UToF32x4_RN:
3053 case Iop_Fixed32SToF32x4_RN:
sewardjb9e6d242013-05-11 13:42:08 +00003054 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003055 return mkPCast32x4(mce, vatom1);
3056
3057 case Iop_F32ToFixed32Ux2_RZ:
3058 case Iop_F32ToFixed32Sx2_RZ:
3059 case Iop_Fixed32UToF32x2_RN:
3060 case Iop_Fixed32SToF32x2_RN:
sewardjb9e6d242013-05-11 13:42:08 +00003061 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003062 return mkPCast32x2(mce, vatom1);
3063
sewardja1d93302004-12-12 16:45:06 +00003064 case Iop_QSub8Ux16:
3065 case Iop_QSub8Sx16:
3066 case Iop_Sub8x16:
3067 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003068 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003069 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003070 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003071 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00003072 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00003073 case Iop_CmpEQ8x16:
3074 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003075 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003076 case Iop_QAdd8Ux16:
3077 case Iop_QAdd8Sx16:
sewardj57f92b02010-08-22 11:54:14 +00003078 case Iop_QSal8x16:
3079 case Iop_QShl8x16:
sewardja1d93302004-12-12 16:45:06 +00003080 case Iop_Add8x16:
sewardj57f92b02010-08-22 11:54:14 +00003081 case Iop_Mul8x16:
3082 case Iop_PolynomialMul8x16:
sewardja1d93302004-12-12 16:45:06 +00003083 return binary8Ix16(mce, vatom1, vatom2);
3084
3085 case Iop_QSub16Ux8:
3086 case Iop_QSub16Sx8:
3087 case Iop_Sub16x8:
3088 case Iop_Mul16x8:
3089 case Iop_MulHi16Sx8:
3090 case Iop_MulHi16Ux8:
3091 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003092 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003093 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003094 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003095 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003096 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003097 case Iop_CmpEQ16x8:
3098 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00003099 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00003100 case Iop_QAdd16Ux8:
3101 case Iop_QAdd16Sx8:
sewardj57f92b02010-08-22 11:54:14 +00003102 case Iop_QSal16x8:
3103 case Iop_QShl16x8:
sewardja1d93302004-12-12 16:45:06 +00003104 case Iop_Add16x8:
sewardj57f92b02010-08-22 11:54:14 +00003105 case Iop_QDMulHi16Sx8:
3106 case Iop_QRDMulHi16Sx8:
sewardja1d93302004-12-12 16:45:06 +00003107 return binary16Ix8(mce, vatom1, vatom2);
3108
3109 case Iop_Sub32x4:
3110 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00003111 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00003112 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00003113 case Iop_QAdd32Sx4:
3114 case Iop_QAdd32Ux4:
3115 case Iop_QSub32Sx4:
3116 case Iop_QSub32Ux4:
sewardj57f92b02010-08-22 11:54:14 +00003117 case Iop_QSal32x4:
3118 case Iop_QShl32x4:
sewardj43d60752005-11-10 18:13:01 +00003119 case Iop_Avg32Ux4:
3120 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00003121 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00003122 case Iop_Max32Ux4:
3123 case Iop_Max32Sx4:
3124 case Iop_Min32Ux4:
3125 case Iop_Min32Sx4:
sewardjb823b852010-06-18 08:18:38 +00003126 case Iop_Mul32x4:
sewardj57f92b02010-08-22 11:54:14 +00003127 case Iop_QDMulHi32Sx4:
3128 case Iop_QRDMulHi32Sx4:
sewardja1d93302004-12-12 16:45:06 +00003129 return binary32Ix4(mce, vatom1, vatom2);
3130
3131 case Iop_Sub64x2:
3132 case Iop_Add64x2:
sewardj9a2afe92011-10-19 15:24:55 +00003133 case Iop_CmpEQ64x2:
sewardjb823b852010-06-18 08:18:38 +00003134 case Iop_CmpGT64Sx2:
sewardj57f92b02010-08-22 11:54:14 +00003135 case Iop_QSal64x2:
3136 case Iop_QShl64x2:
3137 case Iop_QAdd64Ux2:
3138 case Iop_QAdd64Sx2:
3139 case Iop_QSub64Ux2:
3140 case Iop_QSub64Sx2:
sewardja1d93302004-12-12 16:45:06 +00003141 return binary64Ix2(mce, vatom1, vatom2);
3142
sewardj7ee7d852011-06-16 11:37:21 +00003143 case Iop_QNarrowBin32Sto16Sx8:
3144 case Iop_QNarrowBin32Uto16Ux8:
3145 case Iop_QNarrowBin32Sto16Ux8:
3146 case Iop_QNarrowBin16Sto8Sx16:
3147 case Iop_QNarrowBin16Uto8Ux16:
3148 case Iop_QNarrowBin16Sto8Ux16:
3149 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00003150
sewardj0b070592004-12-10 21:44:22 +00003151 case Iop_Sub64Fx2:
3152 case Iop_Mul64Fx2:
3153 case Iop_Min64Fx2:
3154 case Iop_Max64Fx2:
3155 case Iop_Div64Fx2:
3156 case Iop_CmpLT64Fx2:
3157 case Iop_CmpLE64Fx2:
3158 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00003159 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00003160 case Iop_Add64Fx2:
3161 return binary64Fx2(mce, vatom1, vatom2);
3162
3163 case Iop_Sub64F0x2:
3164 case Iop_Mul64F0x2:
3165 case Iop_Min64F0x2:
3166 case Iop_Max64F0x2:
3167 case Iop_Div64F0x2:
3168 case Iop_CmpLT64F0x2:
3169 case Iop_CmpLE64F0x2:
3170 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00003171 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00003172 case Iop_Add64F0x2:
3173 return binary64F0x2(mce, vatom1, vatom2);
3174
sewardj170ee212004-12-10 18:57:51 +00003175 case Iop_Sub32Fx4:
3176 case Iop_Mul32Fx4:
3177 case Iop_Min32Fx4:
3178 case Iop_Max32Fx4:
3179 case Iop_Div32Fx4:
3180 case Iop_CmpLT32Fx4:
3181 case Iop_CmpLE32Fx4:
3182 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00003183 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00003184 case Iop_CmpGT32Fx4:
3185 case Iop_CmpGE32Fx4:
sewardj3245c912004-12-10 14:58:26 +00003186 case Iop_Add32Fx4:
sewardj57f92b02010-08-22 11:54:14 +00003187 case Iop_Recps32Fx4:
3188 case Iop_Rsqrts32Fx4:
sewardj3245c912004-12-10 14:58:26 +00003189 return binary32Fx4(mce, vatom1, vatom2);
3190
sewardj57f92b02010-08-22 11:54:14 +00003191 case Iop_Sub32Fx2:
3192 case Iop_Mul32Fx2:
3193 case Iop_Min32Fx2:
3194 case Iop_Max32Fx2:
3195 case Iop_CmpEQ32Fx2:
3196 case Iop_CmpGT32Fx2:
3197 case Iop_CmpGE32Fx2:
3198 case Iop_Add32Fx2:
3199 case Iop_Recps32Fx2:
3200 case Iop_Rsqrts32Fx2:
3201 return binary32Fx2(mce, vatom1, vatom2);
3202
sewardj170ee212004-12-10 18:57:51 +00003203 case Iop_Sub32F0x4:
3204 case Iop_Mul32F0x4:
3205 case Iop_Min32F0x4:
3206 case Iop_Max32F0x4:
3207 case Iop_Div32F0x4:
3208 case Iop_CmpLT32F0x4:
3209 case Iop_CmpLE32F0x4:
3210 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00003211 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00003212 case Iop_Add32F0x4:
3213 return binary32F0x4(mce, vatom1, vatom2);
3214
sewardj57f92b02010-08-22 11:54:14 +00003215 case Iop_QShlN8Sx16:
3216 case Iop_QShlN8x16:
3217 case Iop_QSalN8x16:
sewardjb9e6d242013-05-11 13:42:08 +00003218 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003219 return mkPCast8x16(mce, vatom1);
3220
3221 case Iop_QShlN16Sx8:
3222 case Iop_QShlN16x8:
3223 case Iop_QSalN16x8:
sewardjb9e6d242013-05-11 13:42:08 +00003224 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003225 return mkPCast16x8(mce, vatom1);
3226
3227 case Iop_QShlN32Sx4:
3228 case Iop_QShlN32x4:
3229 case Iop_QSalN32x4:
sewardjb9e6d242013-05-11 13:42:08 +00003230 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003231 return mkPCast32x4(mce, vatom1);
3232
3233 case Iop_QShlN64Sx2:
3234 case Iop_QShlN64x2:
3235 case Iop_QSalN64x2:
sewardjb9e6d242013-05-11 13:42:08 +00003236 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003237 return mkPCast32x4(mce, vatom1);
3238
3239 case Iop_Mull32Sx2:
3240 case Iop_Mull32Ux2:
3241 case Iop_QDMulLong32Sx2:
sewardj7ee7d852011-06-16 11:37:21 +00003242 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3243 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003244
3245 case Iop_Mull16Sx4:
3246 case Iop_Mull16Ux4:
3247 case Iop_QDMulLong16Sx4:
sewardj7ee7d852011-06-16 11:37:21 +00003248 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3249 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003250
3251 case Iop_Mull8Sx8:
3252 case Iop_Mull8Ux8:
3253 case Iop_PolynomialMull8x8:
sewardj7ee7d852011-06-16 11:37:21 +00003254 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3255 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003256
3257 case Iop_PwAdd32x4:
3258 return mkPCast32x4(mce,
3259 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3260 mkPCast32x4(mce, vatom2))));
3261
3262 case Iop_PwAdd16x8:
3263 return mkPCast16x8(mce,
3264 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3265 mkPCast16x8(mce, vatom2))));
3266
3267 case Iop_PwAdd8x16:
3268 return mkPCast8x16(mce,
3269 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3270 mkPCast8x16(mce, vatom2))));
3271
sewardj20d38f22005-02-07 23:50:18 +00003272 /* V128-bit data-steering */
3273 case Iop_SetV128lo32:
3274 case Iop_SetV128lo64:
3275 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00003276 case Iop_InterleaveLO64x2:
3277 case Iop_InterleaveLO32x4:
3278 case Iop_InterleaveLO16x8:
3279 case Iop_InterleaveLO8x16:
3280 case Iop_InterleaveHI64x2:
3281 case Iop_InterleaveHI32x4:
3282 case Iop_InterleaveHI16x8:
3283 case Iop_InterleaveHI8x16:
sewardj57f92b02010-08-22 11:54:14 +00003284 case Iop_CatOddLanes8x16:
3285 case Iop_CatOddLanes16x8:
3286 case Iop_CatOddLanes32x4:
3287 case Iop_CatEvenLanes8x16:
3288 case Iop_CatEvenLanes16x8:
3289 case Iop_CatEvenLanes32x4:
3290 case Iop_InterleaveOddLanes8x16:
3291 case Iop_InterleaveOddLanes16x8:
3292 case Iop_InterleaveOddLanes32x4:
3293 case Iop_InterleaveEvenLanes8x16:
3294 case Iop_InterleaveEvenLanes16x8:
3295 case Iop_InterleaveEvenLanes32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003296 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003297
3298 case Iop_GetElem8x16:
sewardjb9e6d242013-05-11 13:42:08 +00003299 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003300 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3301 case Iop_GetElem16x8:
sewardjb9e6d242013-05-11 13:42:08 +00003302 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003303 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3304 case Iop_GetElem32x4:
sewardjb9e6d242013-05-11 13:42:08 +00003305 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003306 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3307 case Iop_GetElem64x2:
sewardjb9e6d242013-05-11 13:42:08 +00003308 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003309 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3310
sewardj620eb5b2005-10-22 12:50:43 +00003311 /* Perm8x16: rearrange values in left arg using steering values
3312 from right arg. So rearrange the vbits in the same way but
sewardj350e8f72012-06-25 07:52:15 +00003313 pessimise wrt steering values. Perm32x4 ditto. */
sewardj620eb5b2005-10-22 12:50:43 +00003314 case Iop_Perm8x16:
3315 return mkUifUV128(
3316 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003317 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00003318 mkPCast8x16(mce, vatom2)
3319 );
sewardj350e8f72012-06-25 07:52:15 +00003320 case Iop_Perm32x4:
3321 return mkUifUV128(
3322 mce,
3323 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3324 mkPCast32x4(mce, vatom2)
3325 );
sewardj170ee212004-12-10 18:57:51 +00003326
sewardj43d60752005-11-10 18:13:01 +00003327 /* These two take the lower half of each 16-bit lane, sign/zero
3328 extend it to 32, and multiply together, producing a 32x4
3329 result (and implicitly ignoring half the operand bits). So
3330 treat it as a bunch of independent 16x8 operations, but then
3331 do 32-bit shifts left-right to copy the lower half results
3332 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3333 into the upper half of each result lane. */
3334 case Iop_MullEven16Ux8:
3335 case Iop_MullEven16Sx8: {
3336 IRAtom* at;
3337 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003338 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3339 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00003340 return at;
3341 }
3342
3343 /* Same deal as Iop_MullEven16{S,U}x8 */
3344 case Iop_MullEven8Ux16:
3345 case Iop_MullEven8Sx16: {
3346 IRAtom* at;
3347 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003348 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3349 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00003350 return at;
3351 }
3352
3353 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3354 32x4 -> 16x8 laneage, discarding the upper half of each lane.
3355 Simply apply same op to the V bits, since this really no more
3356 than a data steering operation. */
sewardj7ee7d852011-06-16 11:37:21 +00003357 case Iop_NarrowBin32to16x8:
3358 case Iop_NarrowBin16to8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00003359 return assignNew('V', mce, Ity_V128,
3360 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00003361
3362 case Iop_ShrV128:
3363 case Iop_ShlV128:
3364 /* Same scheme as with all other shifts. Note: 10 Nov 05:
3365 this is wrong now, scalar shifts are done properly lazily.
3366 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003367 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00003368 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00003369
sewardj69a13322005-04-23 01:14:51 +00003370 /* I128-bit data-steering */
3371 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00003372 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00003373
sewardj350e8f72012-06-25 07:52:15 +00003374 /* V256-bit SIMD */
3375
3376 case Iop_Add64Fx4:
3377 case Iop_Sub64Fx4:
3378 case Iop_Mul64Fx4:
3379 case Iop_Div64Fx4:
3380 case Iop_Max64Fx4:
3381 case Iop_Min64Fx4:
3382 return binary64Fx4(mce, vatom1, vatom2);
3383
3384 case Iop_Add32Fx8:
3385 case Iop_Sub32Fx8:
3386 case Iop_Mul32Fx8:
3387 case Iop_Div32Fx8:
3388 case Iop_Max32Fx8:
3389 case Iop_Min32Fx8:
3390 return binary32Fx8(mce, vatom1, vatom2);
3391
3392 /* V256-bit data-steering */
3393 case Iop_V128HLtoV256:
3394 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3395
sewardj3245c912004-12-10 14:58:26 +00003396 /* Scalar floating point */
3397
sewardjb5b87402011-03-07 16:05:35 +00003398 case Iop_F32toI64S:
florian1b9609a2012-09-01 00:15:45 +00003399 case Iop_F32toI64U:
sewardjb5b87402011-03-07 16:05:35 +00003400 /* I32(rm) x F32 -> I64 */
3401 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3402
3403 case Iop_I64StoF32:
3404 /* I32(rm) x I64 -> F32 */
3405 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3406
sewardjed69fdb2006-02-03 16:12:27 +00003407 case Iop_RoundF64toInt:
3408 case Iop_RoundF64toF32:
sewardj06f96d02009-12-31 19:24:12 +00003409 case Iop_F64toI64S:
sewardja201c452011-07-24 14:15:54 +00003410 case Iop_F64toI64U:
sewardj06f96d02009-12-31 19:24:12 +00003411 case Iop_I64StoF64:
sewardjf34eb492011-04-15 11:57:05 +00003412 case Iop_I64UtoF64:
sewardj22ac5f42006-02-03 22:55:04 +00003413 case Iop_SinF64:
3414 case Iop_CosF64:
3415 case Iop_TanF64:
3416 case Iop_2xm1F64:
3417 case Iop_SqrtF64:
3418 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00003419 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3420
sewardjea8b02f2012-04-12 17:28:57 +00003421 case Iop_ShlD64:
3422 case Iop_ShrD64:
sewardj18c72fa2012-04-23 11:22:05 +00003423 case Iop_RoundD64toInt:
florian054684f2013-06-06 21:21:46 +00003424 /* I32(rm) x D64 -> D64 */
sewardjea8b02f2012-04-12 17:28:57 +00003425 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3426
3427 case Iop_ShlD128:
3428 case Iop_ShrD128:
sewardj18c72fa2012-04-23 11:22:05 +00003429 case Iop_RoundD128toInt:
florian054684f2013-06-06 21:21:46 +00003430 /* I32(rm) x D128 -> D128 */
sewardjea8b02f2012-04-12 17:28:57 +00003431 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3432
3433 case Iop_D64toI64S:
florian53eb2a02013-01-12 22:04:00 +00003434 case Iop_D64toI64U:
sewardjea8b02f2012-04-12 17:28:57 +00003435 case Iop_I64StoD64:
florian53eb2a02013-01-12 22:04:00 +00003436 case Iop_I64UtoD64:
florian054684f2013-06-06 21:21:46 +00003437 /* I32(rm) x I64/D64 -> D64/I64 */
sewardjea8b02f2012-04-12 17:28:57 +00003438 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3439
florianba5693c2013-06-17 19:04:24 +00003440 case Iop_F32toD32:
3441 case Iop_F64toD32:
3442 case Iop_F128toD32:
3443 case Iop_D32toF32:
3444 case Iop_D64toF32:
3445 case Iop_D128toF32:
3446 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
3447 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3448
3449 case Iop_F32toD64:
florian39b08d82013-05-05 15:05:42 +00003450 case Iop_F64toD64:
florianba5693c2013-06-17 19:04:24 +00003451 case Iop_F128toD64:
3452 case Iop_D32toF64:
florian39b08d82013-05-05 15:05:42 +00003453 case Iop_D64toF64:
florian39b08d82013-05-05 15:05:42 +00003454 case Iop_D128toF64:
florianba5693c2013-06-17 19:04:24 +00003455 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
florian39b08d82013-05-05 15:05:42 +00003456 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3457
florianba5693c2013-06-17 19:04:24 +00003458 case Iop_F32toD128:
3459 case Iop_F64toD128:
florian39b08d82013-05-05 15:05:42 +00003460 case Iop_F128toD128:
florianba5693c2013-06-17 19:04:24 +00003461 case Iop_D32toF128:
3462 case Iop_D64toF128:
florian39b08d82013-05-05 15:05:42 +00003463 case Iop_D128toF128:
florianba5693c2013-06-17 19:04:24 +00003464 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
florian39b08d82013-05-05 15:05:42 +00003465 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3466
sewardjd376a762010-06-27 09:08:54 +00003467 case Iop_RoundF32toInt:
sewardjaec1be32010-01-03 22:29:32 +00003468 case Iop_SqrtF32:
3469 /* I32(rm) x I32/F32 -> I32/F32 */
3470 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3471
sewardjb5b87402011-03-07 16:05:35 +00003472 case Iop_SqrtF128:
3473 /* I32(rm) x F128 -> F128 */
3474 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3475
3476 case Iop_I32StoF32:
florian1b9609a2012-09-01 00:15:45 +00003477 case Iop_I32UtoF32:
sewardjb5b87402011-03-07 16:05:35 +00003478 case Iop_F32toI32S:
florian1b9609a2012-09-01 00:15:45 +00003479 case Iop_F32toI32U:
sewardjb5b87402011-03-07 16:05:35 +00003480 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3481 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3482
3483 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
florian1b9609a2012-09-01 00:15:45 +00003484 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
sewardjb5b87402011-03-07 16:05:35 +00003485 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
florian733b4db2013-06-06 19:13:29 +00003486 case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32 */
3487 case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32 */
sewardjb5b87402011-03-07 16:05:35 +00003488 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3489
3490 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
florian1b9609a2012-09-01 00:15:45 +00003491 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
sewardjb5b87402011-03-07 16:05:35 +00003492 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
florian733b4db2013-06-06 19:13:29 +00003493 case Iop_D128toD64: /* IRRoundingMode(I64) x D128 -> D64 */
3494 case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64 */
3495 case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64 */
sewardjb5b87402011-03-07 16:05:35 +00003496 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3497
3498 case Iop_F64HLtoF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00003499 case Iop_D64HLtoD128:
sewardj350e8f72012-06-25 07:52:15 +00003500 return assignNew('V', mce, Ity_I128,
3501 binop(Iop_64HLto128, vatom1, vatom2));
sewardjb5b87402011-03-07 16:05:35 +00003502
sewardj59570ff2010-01-01 11:59:33 +00003503 case Iop_F64toI32U:
sewardj06f96d02009-12-31 19:24:12 +00003504 case Iop_F64toI32S:
sewardje9e16d32004-12-10 13:17:55 +00003505 case Iop_F64toF32:
sewardjf34eb492011-04-15 11:57:05 +00003506 case Iop_I64UtoF32:
florian53eb2a02013-01-12 22:04:00 +00003507 case Iop_D64toI32U:
3508 case Iop_D64toI32S:
3509 /* First arg is I32 (rounding mode), second is F64/D64 (data). */
sewardj95448072004-11-22 20:19:51 +00003510 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3511
sewardjea8b02f2012-04-12 17:28:57 +00003512 case Iop_D64toD32:
florian054684f2013-06-06 21:21:46 +00003513 /* First arg is I32 (rounding mode), second is D64 (data). */
florianf4bed372012-12-21 04:25:10 +00003514 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
sewardjea8b02f2012-04-12 17:28:57 +00003515
sewardj06f96d02009-12-31 19:24:12 +00003516 case Iop_F64toI16S:
sewardj95448072004-11-22 20:19:51 +00003517 /* First arg is I32 (rounding mode), second is F64 (data). */
3518 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3519
sewardj18c72fa2012-04-23 11:22:05 +00003520 case Iop_InsertExpD64:
3521 /* I64 x I64 -> D64 */
3522 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3523
3524 case Iop_InsertExpD128:
3525 /* I64 x I128 -> D128 */
3526 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3527
sewardjb5b87402011-03-07 16:05:35 +00003528 case Iop_CmpF32:
sewardj95448072004-11-22 20:19:51 +00003529 case Iop_CmpF64:
sewardjb5b87402011-03-07 16:05:35 +00003530 case Iop_CmpF128:
sewardj18c72fa2012-04-23 11:22:05 +00003531 case Iop_CmpD64:
3532 case Iop_CmpD128:
florian29a36b92012-12-26 17:48:46 +00003533 case Iop_CmpExpD64:
3534 case Iop_CmpExpD128:
sewardj95448072004-11-22 20:19:51 +00003535 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3536
3537 /* non-FP after here */
3538
3539 case Iop_DivModU64to32:
3540 case Iop_DivModS64to32:
3541 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3542
sewardj69a13322005-04-23 01:14:51 +00003543 case Iop_DivModU128to64:
3544 case Iop_DivModS128to64:
3545 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3546
florian537ed2d2012-08-20 16:51:39 +00003547 case Iop_8HLto16:
3548 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003549 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003550 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003551 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00003552 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003553
sewardjb5b87402011-03-07 16:05:35 +00003554 case Iop_DivModS64to64:
sewardj6cf40ff2005-04-20 22:31:26 +00003555 case Iop_MullS64:
3556 case Iop_MullU64: {
3557 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3558 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj350e8f72012-06-25 07:52:15 +00003559 return assignNew('V', mce, Ity_I128,
3560 binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00003561 }
3562
sewardj95448072004-11-22 20:19:51 +00003563 case Iop_MullS32:
3564 case Iop_MullU32: {
3565 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3566 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj350e8f72012-06-25 07:52:15 +00003567 return assignNew('V', mce, Ity_I64,
3568 binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00003569 }
3570
3571 case Iop_MullS16:
3572 case Iop_MullU16: {
3573 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3574 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj350e8f72012-06-25 07:52:15 +00003575 return assignNew('V', mce, Ity_I32,
3576 binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00003577 }
3578
3579 case Iop_MullS8:
3580 case Iop_MullU8: {
3581 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3582 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00003583 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00003584 }
3585
sewardj5af05062010-10-18 16:31:14 +00003586 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
cerion9e591082005-06-23 15:28:34 +00003587 case Iop_DivS32:
3588 case Iop_DivU32:
sewardja201c452011-07-24 14:15:54 +00003589 case Iop_DivU32E:
sewardj169ac042011-09-05 12:12:34 +00003590 case Iop_DivS32E:
sewardj2157b2c2012-07-11 13:20:58 +00003591 case Iop_QAdd32S: /* could probably do better */
3592 case Iop_QSub32S: /* could probably do better */
cerion9e591082005-06-23 15:28:34 +00003593 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3594
sewardjb00944a2005-12-23 12:47:16 +00003595 case Iop_DivS64:
3596 case Iop_DivU64:
sewardja201c452011-07-24 14:15:54 +00003597 case Iop_DivS64E:
sewardj169ac042011-09-05 12:12:34 +00003598 case Iop_DivU64E:
sewardjb00944a2005-12-23 12:47:16 +00003599 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3600
sewardj95448072004-11-22 20:19:51 +00003601 case Iop_Add32:
sewardj54eac252012-03-27 10:19:39 +00003602 if (mce->bogusLiterals || mce->useLLVMworkarounds)
sewardjd5204dc2004-12-31 01:16:11 +00003603 return expensiveAddSub(mce,True,Ity_I32,
3604 vatom1,vatom2, atom1,atom2);
3605 else
3606 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00003607 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00003608 if (mce->bogusLiterals)
3609 return expensiveAddSub(mce,False,Ity_I32,
3610 vatom1,vatom2, atom1,atom2);
3611 else
3612 goto cheap_AddSub32;
3613
3614 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00003615 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00003616 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3617
sewardj463b3d92005-07-18 11:41:15 +00003618 case Iop_CmpORD32S:
3619 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00003620 case Iop_CmpORD64S:
3621 case Iop_CmpORD64U:
3622 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00003623
sewardj681be302005-01-15 20:43:58 +00003624 case Iop_Add64:
sewardj54eac252012-03-27 10:19:39 +00003625 if (mce->bogusLiterals || mce->useLLVMworkarounds)
tomd9774d72005-06-27 08:11:01 +00003626 return expensiveAddSub(mce,True,Ity_I64,
3627 vatom1,vatom2, atom1,atom2);
3628 else
3629 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00003630 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00003631 if (mce->bogusLiterals)
3632 return expensiveAddSub(mce,False,Ity_I64,
3633 vatom1,vatom2, atom1,atom2);
3634 else
3635 goto cheap_AddSub64;
3636
3637 cheap_AddSub64:
3638 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00003639 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3640
sewardj95448072004-11-22 20:19:51 +00003641 case Iop_Mul16:
3642 case Iop_Add16:
3643 case Iop_Sub16:
3644 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3645
florian537ed2d2012-08-20 16:51:39 +00003646 case Iop_Mul8:
sewardj95448072004-11-22 20:19:51 +00003647 case Iop_Sub8:
3648 case Iop_Add8:
3649 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3650
sewardj69a13322005-04-23 01:14:51 +00003651 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00003652 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00003653 if (mce->bogusLiterals)
sewardj4cfa81b2012-11-08 10:58:16 +00003654 goto expensive_cmp64;
sewardj69a13322005-04-23 01:14:51 +00003655 else
3656 goto cheap_cmp64;
sewardj4cfa81b2012-11-08 10:58:16 +00003657
3658 expensive_cmp64:
3659 case Iop_ExpCmpNE64:
3660 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3661
sewardj69a13322005-04-23 01:14:51 +00003662 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00003663 case Iop_CmpLE64S: case Iop_CmpLE64U:
3664 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00003665 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3666
sewardjd5204dc2004-12-31 01:16:11 +00003667 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00003668 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00003669 if (mce->bogusLiterals)
sewardj4cfa81b2012-11-08 10:58:16 +00003670 goto expensive_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00003671 else
3672 goto cheap_cmp32;
sewardj4cfa81b2012-11-08 10:58:16 +00003673
3674 expensive_cmp32:
3675 case Iop_ExpCmpNE32:
3676 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3677
sewardjd5204dc2004-12-31 01:16:11 +00003678 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00003679 case Iop_CmpLE32S: case Iop_CmpLE32U:
3680 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00003681 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3682
3683 case Iop_CmpEQ16: case Iop_CmpNE16:
3684 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3685
sewardj4cfa81b2012-11-08 10:58:16 +00003686 case Iop_ExpCmpNE16:
3687 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 );
3688
sewardj95448072004-11-22 20:19:51 +00003689 case Iop_CmpEQ8: case Iop_CmpNE8:
3690 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3691
sewardjafed4c52009-07-12 13:00:17 +00003692 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
3693 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3694 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3695 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3696 /* Just say these all produce a defined result, regardless
3697 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
3698 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3699
sewardjaaddbc22005-10-07 09:49:53 +00003700 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3701 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3702
sewardj95448072004-11-22 20:19:51 +00003703 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00003704 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003705
sewardjdb67f5f2004-12-14 01:15:31 +00003706 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00003707 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003708
florian537ed2d2012-08-20 16:51:39 +00003709 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
sewardjaaddbc22005-10-07 09:49:53 +00003710 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003711
sewardj350e8f72012-06-25 07:52:15 +00003712 case Iop_AndV256:
3713 uifu = mkUifUV256; difd = mkDifDV256;
3714 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003715 case Iop_AndV128:
3716 uifu = mkUifUV128; difd = mkDifDV128;
3717 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003718 case Iop_And64:
3719 uifu = mkUifU64; difd = mkDifD64;
3720 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003721 case Iop_And32:
3722 uifu = mkUifU32; difd = mkDifD32;
3723 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3724 case Iop_And16:
3725 uifu = mkUifU16; difd = mkDifD16;
3726 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3727 case Iop_And8:
3728 uifu = mkUifU8; difd = mkDifD8;
3729 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3730
sewardj350e8f72012-06-25 07:52:15 +00003731 case Iop_OrV256:
3732 uifu = mkUifUV256; difd = mkDifDV256;
3733 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003734 case Iop_OrV128:
3735 uifu = mkUifUV128; difd = mkDifDV128;
3736 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003737 case Iop_Or64:
3738 uifu = mkUifU64; difd = mkDifD64;
3739 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003740 case Iop_Or32:
3741 uifu = mkUifU32; difd = mkDifD32;
3742 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3743 case Iop_Or16:
3744 uifu = mkUifU16; difd = mkDifD16;
3745 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3746 case Iop_Or8:
3747 uifu = mkUifU8; difd = mkDifD8;
3748 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3749
3750 do_And_Or:
3751 return
3752 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00003753 'V', mce,
sewardj95448072004-11-22 20:19:51 +00003754 and_or_ty,
3755 difd(mce, uifu(mce, vatom1, vatom2),
3756 difd(mce, improve(mce, atom1, vatom1),
3757 improve(mce, atom2, vatom2) ) ) );
3758
3759 case Iop_Xor8:
3760 return mkUifU8(mce, vatom1, vatom2);
3761 case Iop_Xor16:
3762 return mkUifU16(mce, vatom1, vatom2);
3763 case Iop_Xor32:
3764 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00003765 case Iop_Xor64:
3766 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00003767 case Iop_XorV128:
3768 return mkUifUV128(mce, vatom1, vatom2);
sewardj350e8f72012-06-25 07:52:15 +00003769 case Iop_XorV256:
3770 return mkUifUV256(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00003771
sewardja2f30952013-03-27 11:40:02 +00003772 /* V256-bit SIMD */
3773
3774 case Iop_ShrN16x16:
3775 case Iop_ShrN32x8:
3776 case Iop_ShrN64x4:
3777 case Iop_SarN16x16:
3778 case Iop_SarN32x8:
3779 case Iop_ShlN16x16:
3780 case Iop_ShlN32x8:
3781 case Iop_ShlN64x4:
3782 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3783 this is wrong now, scalar shifts are done properly lazily.
3784 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003785 complainIfUndefined(mce, atom2, NULL);
sewardja2f30952013-03-27 11:40:02 +00003786 return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2));
3787
3788 case Iop_QSub8Ux32:
3789 case Iop_QSub8Sx32:
3790 case Iop_Sub8x32:
3791 case Iop_Min8Ux32:
3792 case Iop_Min8Sx32:
3793 case Iop_Max8Ux32:
3794 case Iop_Max8Sx32:
3795 case Iop_CmpGT8Sx32:
3796 case Iop_CmpEQ8x32:
3797 case Iop_Avg8Ux32:
3798 case Iop_QAdd8Ux32:
3799 case Iop_QAdd8Sx32:
3800 case Iop_Add8x32:
3801 return binary8Ix32(mce, vatom1, vatom2);
3802
3803 case Iop_QSub16Ux16:
3804 case Iop_QSub16Sx16:
3805 case Iop_Sub16x16:
3806 case Iop_Mul16x16:
3807 case Iop_MulHi16Sx16:
3808 case Iop_MulHi16Ux16:
3809 case Iop_Min16Sx16:
3810 case Iop_Min16Ux16:
3811 case Iop_Max16Sx16:
3812 case Iop_Max16Ux16:
3813 case Iop_CmpGT16Sx16:
3814 case Iop_CmpEQ16x16:
3815 case Iop_Avg16Ux16:
3816 case Iop_QAdd16Ux16:
3817 case Iop_QAdd16Sx16:
3818 case Iop_Add16x16:
3819 return binary16Ix16(mce, vatom1, vatom2);
3820
3821 case Iop_Sub32x8:
3822 case Iop_CmpGT32Sx8:
3823 case Iop_CmpEQ32x8:
3824 case Iop_Add32x8:
3825 case Iop_Max32Ux8:
3826 case Iop_Max32Sx8:
3827 case Iop_Min32Ux8:
3828 case Iop_Min32Sx8:
3829 case Iop_Mul32x8:
3830 return binary32Ix8(mce, vatom1, vatom2);
3831
3832 case Iop_Sub64x4:
3833 case Iop_Add64x4:
3834 case Iop_CmpEQ64x4:
3835 case Iop_CmpGT64Sx4:
3836 return binary64Ix4(mce, vatom1, vatom2);
3837
3838 /* Perm32x8: rearrange values in left arg using steering values
3839 from right arg. So rearrange the vbits in the same way but
3840 pessimise wrt steering values. */
3841 case Iop_Perm32x8:
3842 return mkUifUV256(
3843 mce,
3844 assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)),
3845 mkPCast32x8(mce, vatom2)
3846 );
3847
njn25e49d8e72002-09-23 09:36:25 +00003848 default:
sewardj95448072004-11-22 20:19:51 +00003849 ppIROp(op);
3850 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00003851 }
njn25e49d8e72002-09-23 09:36:25 +00003852}
3853
njn25e49d8e72002-09-23 09:36:25 +00003854
sewardj95448072004-11-22 20:19:51 +00003855static
3856IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3857{
sewardjcafe5052013-01-17 14:24:35 +00003858 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
3859 selection of shadow operation implicitly duplicates the logic in
3860 do_shadow_LoadG and should be kept in sync (in the very unlikely
3861 event that the interpretation of such widening ops changes in
3862 future). See comment in do_shadow_LoadG. */
sewardj95448072004-11-22 20:19:51 +00003863 IRAtom* vatom = expr2vbits( mce, atom );
3864 tl_assert(isOriginalAtom(mce,atom));
3865 switch (op) {
3866
sewardj0b070592004-12-10 21:44:22 +00003867 case Iop_Sqrt64Fx2:
3868 return unary64Fx2(mce, vatom);
3869
3870 case Iop_Sqrt64F0x2:
3871 return unary64F0x2(mce, vatom);
3872
sewardj350e8f72012-06-25 07:52:15 +00003873 case Iop_Sqrt32Fx8:
3874 case Iop_RSqrt32Fx8:
3875 case Iop_Recip32Fx8:
3876 return unary32Fx8(mce, vatom);
3877
3878 case Iop_Sqrt64Fx4:
3879 return unary64Fx4(mce, vatom);
3880
sewardj170ee212004-12-10 18:57:51 +00003881 case Iop_Sqrt32Fx4:
3882 case Iop_RSqrt32Fx4:
3883 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00003884 case Iop_I32UtoFx4:
3885 case Iop_I32StoFx4:
3886 case Iop_QFtoI32Ux4_RZ:
3887 case Iop_QFtoI32Sx4_RZ:
3888 case Iop_RoundF32x4_RM:
3889 case Iop_RoundF32x4_RP:
3890 case Iop_RoundF32x4_RN:
3891 case Iop_RoundF32x4_RZ:
sewardj57f92b02010-08-22 11:54:14 +00003892 case Iop_Recip32x4:
3893 case Iop_Abs32Fx4:
3894 case Iop_Neg32Fx4:
3895 case Iop_Rsqrte32Fx4:
sewardj170ee212004-12-10 18:57:51 +00003896 return unary32Fx4(mce, vatom);
3897
sewardj57f92b02010-08-22 11:54:14 +00003898 case Iop_I32UtoFx2:
3899 case Iop_I32StoFx2:
3900 case Iop_Recip32Fx2:
3901 case Iop_Recip32x2:
3902 case Iop_Abs32Fx2:
3903 case Iop_Neg32Fx2:
3904 case Iop_Rsqrte32Fx2:
3905 return unary32Fx2(mce, vatom);
3906
sewardj170ee212004-12-10 18:57:51 +00003907 case Iop_Sqrt32F0x4:
3908 case Iop_RSqrt32F0x4:
3909 case Iop_Recip32F0x4:
3910 return unary32F0x4(mce, vatom);
3911
sewardj20d38f22005-02-07 23:50:18 +00003912 case Iop_32UtoV128:
3913 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00003914 case Iop_Dup8x16:
3915 case Iop_Dup16x8:
3916 case Iop_Dup32x4:
sewardj57f92b02010-08-22 11:54:14 +00003917 case Iop_Reverse16_8x16:
3918 case Iop_Reverse32_8x16:
3919 case Iop_Reverse32_16x8:
3920 case Iop_Reverse64_8x16:
3921 case Iop_Reverse64_16x8:
3922 case Iop_Reverse64_32x4:
sewardj350e8f72012-06-25 07:52:15 +00003923 case Iop_V256toV128_1: case Iop_V256toV128_0:
sewardj7cf4e6b2008-05-01 20:24:26 +00003924 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00003925
sewardjb5b87402011-03-07 16:05:35 +00003926 case Iop_F128HItoF64: /* F128 -> high half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00003927 case Iop_D128HItoD64: /* D128 -> high half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00003928 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
3929 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00003930 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00003931 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
3932
3933 case Iop_NegF128:
3934 case Iop_AbsF128:
3935 return mkPCastTo(mce, Ity_I128, vatom);
3936
3937 case Iop_I32StoF128: /* signed I32 -> F128 */
3938 case Iop_I64StoF128: /* signed I64 -> F128 */
florian1b9609a2012-09-01 00:15:45 +00003939 case Iop_I32UtoF128: /* unsigned I32 -> F128 */
3940 case Iop_I64UtoF128: /* unsigned I64 -> F128 */
sewardjb5b87402011-03-07 16:05:35 +00003941 case Iop_F32toF128: /* F32 -> F128 */
3942 case Iop_F64toF128: /* F64 -> F128 */
florian53eb2a02013-01-12 22:04:00 +00003943 case Iop_I32StoD128: /* signed I64 -> D128 */
sewardjea8b02f2012-04-12 17:28:57 +00003944 case Iop_I64StoD128: /* signed I64 -> D128 */
florian53eb2a02013-01-12 22:04:00 +00003945 case Iop_I32UtoD128: /* unsigned I32 -> D128 */
3946 case Iop_I64UtoD128: /* unsigned I64 -> D128 */
sewardjb5b87402011-03-07 16:05:35 +00003947 return mkPCastTo(mce, Ity_I128, vatom);
3948
sewardj95448072004-11-22 20:19:51 +00003949 case Iop_F32toF64:
sewardj06f96d02009-12-31 19:24:12 +00003950 case Iop_I32StoF64:
sewardj59570ff2010-01-01 11:59:33 +00003951 case Iop_I32UtoF64:
sewardj95448072004-11-22 20:19:51 +00003952 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00003953 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00003954 case Iop_Est5FRSqrt:
sewardjdead90a2008-08-08 08:38:23 +00003955 case Iop_RoundF64toF64_NEAREST:
3956 case Iop_RoundF64toF64_NegINF:
3957 case Iop_RoundF64toF64_PosINF:
3958 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00003959 case Iop_Clz64:
sewardjea8b02f2012-04-12 17:28:57 +00003960 case Iop_D32toD64:
florian53eb2a02013-01-12 22:04:00 +00003961 case Iop_I32StoD64:
3962 case Iop_I32UtoD64:
sewardj18c72fa2012-04-23 11:22:05 +00003963 case Iop_ExtractExpD64: /* D64 -> I64 */
3964 case Iop_ExtractExpD128: /* D128 -> I64 */
florian974b4092012-12-27 20:06:18 +00003965 case Iop_ExtractSigD64: /* D64 -> I64 */
3966 case Iop_ExtractSigD128: /* D128 -> I64 */
florian1943eb52012-08-22 18:09:07 +00003967 case Iop_DPBtoBCD:
3968 case Iop_BCDtoDPB:
sewardj95448072004-11-22 20:19:51 +00003969 return mkPCastTo(mce, Ity_I64, vatom);
3970
sewardjea8b02f2012-04-12 17:28:57 +00003971 case Iop_D64toD128:
3972 return mkPCastTo(mce, Ity_I128, vatom);
3973
sewardj95448072004-11-22 20:19:51 +00003974 case Iop_Clz32:
sewardjed69fdb2006-02-03 16:12:27 +00003975 case Iop_TruncF64asF32:
sewardj59570ff2010-01-01 11:59:33 +00003976 case Iop_NegF32:
3977 case Iop_AbsF32:
sewardj95448072004-11-22 20:19:51 +00003978 return mkPCastTo(mce, Ity_I32, vatom);
3979
sewardj4cfa81b2012-11-08 10:58:16 +00003980 case Iop_Ctz32:
3981 case Iop_Ctz64:
3982 return expensiveCountTrailingZeroes(mce, op, atom, vatom);
3983
sewardjd9dbc192005-04-27 11:40:27 +00003984 case Iop_1Uto64:
sewardja201c452011-07-24 14:15:54 +00003985 case Iop_1Sto64:
sewardjd9dbc192005-04-27 11:40:27 +00003986 case Iop_8Uto64:
3987 case Iop_8Sto64:
3988 case Iop_16Uto64:
3989 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00003990 case Iop_32Sto64:
3991 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00003992 case Iop_V128to64:
3993 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00003994 case Iop_128HIto64:
3995 case Iop_128to64:
sewardj57f92b02010-08-22 11:54:14 +00003996 case Iop_Dup8x8:
3997 case Iop_Dup16x4:
3998 case Iop_Dup32x2:
3999 case Iop_Reverse16_8x8:
4000 case Iop_Reverse32_8x8:
4001 case Iop_Reverse32_16x4:
4002 case Iop_Reverse64_8x8:
4003 case Iop_Reverse64_16x4:
4004 case Iop_Reverse64_32x2:
sewardj350e8f72012-06-25 07:52:15 +00004005 case Iop_V256to64_0: case Iop_V256to64_1:
4006 case Iop_V256to64_2: case Iop_V256to64_3:
sewardj7cf4e6b2008-05-01 20:24:26 +00004007 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004008
4009 case Iop_64to32:
4010 case Iop_64HIto32:
4011 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00004012 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00004013 case Iop_8Uto32:
4014 case Iop_16Uto32:
4015 case Iop_16Sto32:
4016 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00004017 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00004018 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004019
4020 case Iop_8Sto16:
4021 case Iop_8Uto16:
4022 case Iop_32to16:
4023 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00004024 case Iop_64to16:
sewardjf5176342012-12-13 18:31:49 +00004025 case Iop_GetMSBs8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00004026 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004027
4028 case Iop_1Uto8:
sewardja201c452011-07-24 14:15:54 +00004029 case Iop_1Sto8:
sewardj95448072004-11-22 20:19:51 +00004030 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00004031 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00004032 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00004033 case Iop_64to8:
sewardj4cfa81b2012-11-08 10:58:16 +00004034 case Iop_GetMSBs8x8:
sewardj7cf4e6b2008-05-01 20:24:26 +00004035 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004036
4037 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00004038 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00004039
sewardjd9dbc192005-04-27 11:40:27 +00004040 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00004041 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00004042
sewardj95448072004-11-22 20:19:51 +00004043 case Iop_ReinterpF64asI64:
4044 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00004045 case Iop_ReinterpI32asF32:
sewardj59570ff2010-01-01 11:59:33 +00004046 case Iop_ReinterpF32asI32:
sewardj18c72fa2012-04-23 11:22:05 +00004047 case Iop_ReinterpI64asD64:
sewardj0892b822012-04-29 20:20:16 +00004048 case Iop_ReinterpD64asI64:
sewardj350e8f72012-06-25 07:52:15 +00004049 case Iop_NotV256:
sewardj20d38f22005-02-07 23:50:18 +00004050 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00004051 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00004052 case Iop_Not32:
4053 case Iop_Not16:
4054 case Iop_Not8:
4055 case Iop_Not1:
4056 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00004057
sewardj57f92b02010-08-22 11:54:14 +00004058 case Iop_CmpNEZ8x8:
4059 case Iop_Cnt8x8:
4060 case Iop_Clz8Sx8:
4061 case Iop_Cls8Sx8:
4062 case Iop_Abs8x8:
4063 return mkPCast8x8(mce, vatom);
4064
4065 case Iop_CmpNEZ8x16:
4066 case Iop_Cnt8x16:
4067 case Iop_Clz8Sx16:
4068 case Iop_Cls8Sx16:
4069 case Iop_Abs8x16:
4070 return mkPCast8x16(mce, vatom);
4071
4072 case Iop_CmpNEZ16x4:
4073 case Iop_Clz16Sx4:
4074 case Iop_Cls16Sx4:
4075 case Iop_Abs16x4:
4076 return mkPCast16x4(mce, vatom);
4077
4078 case Iop_CmpNEZ16x8:
4079 case Iop_Clz16Sx8:
4080 case Iop_Cls16Sx8:
4081 case Iop_Abs16x8:
4082 return mkPCast16x8(mce, vatom);
4083
4084 case Iop_CmpNEZ32x2:
4085 case Iop_Clz32Sx2:
4086 case Iop_Cls32Sx2:
4087 case Iop_FtoI32Ux2_RZ:
4088 case Iop_FtoI32Sx2_RZ:
4089 case Iop_Abs32x2:
4090 return mkPCast32x2(mce, vatom);
4091
4092 case Iop_CmpNEZ32x4:
4093 case Iop_Clz32Sx4:
4094 case Iop_Cls32Sx4:
4095 case Iop_FtoI32Ux4_RZ:
4096 case Iop_FtoI32Sx4_RZ:
4097 case Iop_Abs32x4:
4098 return mkPCast32x4(mce, vatom);
4099
florian537ed2d2012-08-20 16:51:39 +00004100 case Iop_CmpwNEZ32:
4101 return mkPCastTo(mce, Ity_I32, vatom);
4102
sewardj57f92b02010-08-22 11:54:14 +00004103 case Iop_CmpwNEZ64:
4104 return mkPCastTo(mce, Ity_I64, vatom);
4105
4106 case Iop_CmpNEZ64x2:
4107 return mkPCast64x2(mce, vatom);
4108
sewardj7ee7d852011-06-16 11:37:21 +00004109 case Iop_NarrowUn16to8x8:
4110 case Iop_NarrowUn32to16x4:
4111 case Iop_NarrowUn64to32x2:
4112 case Iop_QNarrowUn16Sto8Sx8:
4113 case Iop_QNarrowUn16Sto8Ux8:
4114 case Iop_QNarrowUn16Uto8Ux8:
4115 case Iop_QNarrowUn32Sto16Sx4:
4116 case Iop_QNarrowUn32Sto16Ux4:
4117 case Iop_QNarrowUn32Uto16Ux4:
4118 case Iop_QNarrowUn64Sto32Sx2:
4119 case Iop_QNarrowUn64Sto32Ux2:
4120 case Iop_QNarrowUn64Uto32Ux2:
4121 return vectorNarrowUnV128(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00004122
sewardj7ee7d852011-06-16 11:37:21 +00004123 case Iop_Widen8Sto16x8:
4124 case Iop_Widen8Uto16x8:
4125 case Iop_Widen16Sto32x4:
4126 case Iop_Widen16Uto32x4:
4127 case Iop_Widen32Sto64x2:
4128 case Iop_Widen32Uto64x2:
4129 return vectorWidenI64(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00004130
4131 case Iop_PwAddL32Ux2:
4132 case Iop_PwAddL32Sx2:
4133 return mkPCastTo(mce, Ity_I64,
4134 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
4135
4136 case Iop_PwAddL16Ux4:
4137 case Iop_PwAddL16Sx4:
4138 return mkPCast32x2(mce,
4139 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
4140
4141 case Iop_PwAddL8Ux8:
4142 case Iop_PwAddL8Sx8:
4143 return mkPCast16x4(mce,
4144 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
4145
4146 case Iop_PwAddL32Ux4:
4147 case Iop_PwAddL32Sx4:
4148 return mkPCast64x2(mce,
4149 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
4150
4151 case Iop_PwAddL16Ux8:
4152 case Iop_PwAddL16Sx8:
4153 return mkPCast32x4(mce,
4154 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
4155
4156 case Iop_PwAddL8Ux16:
4157 case Iop_PwAddL8Sx16:
4158 return mkPCast16x8(mce,
4159 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
4160
sewardjf34eb492011-04-15 11:57:05 +00004161 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00004162 default:
4163 ppIROp(op);
4164 VG_(tool_panic)("memcheck:expr2vbits_Unop");
4165 }
4166}
4167
4168
sewardjb9e6d242013-05-11 13:42:08 +00004169/* Worker function -- do not call directly. See comments on
4170 expr2vbits_Load for the meaning of |guard|.
4171
4172 Generates IR to (1) perform a definedness test of |addr|, (2)
4173 perform a validity test of |addr|, and (3) return the Vbits for the
4174 location indicated by |addr|. All of this only happens when
4175 |guard| is NULL or |guard| evaluates to True at run time.
4176
4177 If |guard| evaluates to False at run time, the returned value is
4178 the IR-mandated 0x55..55 value, and no checks nor shadow loads are
4179 performed.
4180
4181 The definedness of |guard| itself is not checked. That is assumed
4182 to have been done before this point, by the caller. */
sewardj95448072004-11-22 20:19:51 +00004183static
sewardj2e595852005-06-30 23:33:37 +00004184IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
4185 IREndness end, IRType ty,
sewardjcafe5052013-01-17 14:24:35 +00004186 IRAtom* addr, UInt bias, IRAtom* guard )
sewardj95448072004-11-22 20:19:51 +00004187{
4188 void* helper;
floriana5f894c2012-10-21 03:43:20 +00004189 const HChar* hname;
sewardj95448072004-11-22 20:19:51 +00004190 IRDirty* di;
4191 IRTemp datavbits;
4192 IRAtom* addrAct;
4193
4194 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00004195 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00004196
4197 /* First, emit a definedness test for the address. This also sets
4198 the address (shadow) to 'defined' following the test. */
sewardjb9e6d242013-05-11 13:42:08 +00004199 complainIfUndefined( mce, addr, guard );
sewardj95448072004-11-22 20:19:51 +00004200
4201 /* Now cook up a call to the relevant helper function, to read the
4202 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00004203 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00004204
4205 if (end == Iend_LE) {
4206 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00004207 case Ity_I64: helper = &MC_(helperc_LOADV64le);
4208 hname = "MC_(helperc_LOADV64le)";
sewardj2e595852005-06-30 23:33:37 +00004209 break;
njn1d0825f2006-03-27 11:37:07 +00004210 case Ity_I32: helper = &MC_(helperc_LOADV32le);
4211 hname = "MC_(helperc_LOADV32le)";
sewardj2e595852005-06-30 23:33:37 +00004212 break;
njn1d0825f2006-03-27 11:37:07 +00004213 case Ity_I16: helper = &MC_(helperc_LOADV16le);
4214 hname = "MC_(helperc_LOADV16le)";
sewardj2e595852005-06-30 23:33:37 +00004215 break;
njn1d0825f2006-03-27 11:37:07 +00004216 case Ity_I8: helper = &MC_(helperc_LOADV8);
4217 hname = "MC_(helperc_LOADV8)";
sewardj2e595852005-06-30 23:33:37 +00004218 break;
4219 default: ppIRType(ty);
sewardjb9e6d242013-05-11 13:42:08 +00004220 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)");
sewardj2e595852005-06-30 23:33:37 +00004221 }
4222 } else {
sewardj8cf88b72005-07-08 01:29:33 +00004223 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00004224 case Ity_I64: helper = &MC_(helperc_LOADV64be);
4225 hname = "MC_(helperc_LOADV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00004226 break;
njn1d0825f2006-03-27 11:37:07 +00004227 case Ity_I32: helper = &MC_(helperc_LOADV32be);
4228 hname = "MC_(helperc_LOADV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00004229 break;
njn1d0825f2006-03-27 11:37:07 +00004230 case Ity_I16: helper = &MC_(helperc_LOADV16be);
4231 hname = "MC_(helperc_LOADV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00004232 break;
njn1d0825f2006-03-27 11:37:07 +00004233 case Ity_I8: helper = &MC_(helperc_LOADV8);
4234 hname = "MC_(helperc_LOADV8)";
sewardj8cf88b72005-07-08 01:29:33 +00004235 break;
4236 default: ppIRType(ty);
sewardjb9e6d242013-05-11 13:42:08 +00004237 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)");
sewardj8cf88b72005-07-08 01:29:33 +00004238 }
sewardj95448072004-11-22 20:19:51 +00004239 }
4240
4241 /* Generate the actual address into addrAct. */
4242 if (bias == 0) {
4243 addrAct = addr;
4244 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00004245 IROp mkAdd;
4246 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00004247 IRType tyAddr = mce->hWordTy;
4248 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00004249 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4250 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00004251 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00004252 }
4253
4254 /* We need to have a place to park the V bits we're just about to
4255 read. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004256 datavbits = newTemp(mce, ty, VSh);
sewardj95448072004-11-22 20:19:51 +00004257 di = unsafeIRDirty_1_N( datavbits,
sewardj53ee1fc2005-12-23 02:29:58 +00004258 1/*regparms*/,
4259 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj95448072004-11-22 20:19:51 +00004260 mkIRExprVec_1( addrAct ));
4261 setHelperAnns( mce, di );
sewardjcafe5052013-01-17 14:24:35 +00004262 if (guard) {
4263 di->guard = guard;
4264 /* Ideally the didn't-happen return value here would be all-ones
4265 (all-undefined), so it'd be obvious if it got used
4266 inadvertantly. We can get by with the IR-mandated default
4267 value (0b01 repeating, 0x55 etc) as that'll still look pretty
4268 undefined if it ever leaks out. */
4269 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004270 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00004271
4272 return mkexpr(datavbits);
4273}
4274
4275
sewardjcafe5052013-01-17 14:24:35 +00004276/* Generate IR to do a shadow load. The helper is expected to check
4277 the validity of the address and return the V bits for that address.
4278 This can optionally be controlled by a guard, which is assumed to
4279 be True if NULL. In the case where the guard is False at runtime,
sewardjb9e6d242013-05-11 13:42:08 +00004280 the helper will return the didn't-do-the-call value of 0x55..55.
4281 Since that means "completely undefined result", the caller of
sewardjcafe5052013-01-17 14:24:35 +00004282 this function will need to fix up the result somehow in that
4283 case.
sewardjb9e6d242013-05-11 13:42:08 +00004284
4285 Caller of this function is also expected to have checked the
4286 definedness of |guard| before this point.
sewardjcafe5052013-01-17 14:24:35 +00004287*/
sewardj95448072004-11-22 20:19:51 +00004288static
sewardj2e595852005-06-30 23:33:37 +00004289IRAtom* expr2vbits_Load ( MCEnv* mce,
4290 IREndness end, IRType ty,
sewardjcafe5052013-01-17 14:24:35 +00004291 IRAtom* addr, UInt bias,
4292 IRAtom* guard )
sewardj170ee212004-12-10 18:57:51 +00004293{
sewardj2e595852005-06-30 23:33:37 +00004294 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00004295 switch (shadowTypeV(ty)) {
sewardj170ee212004-12-10 18:57:51 +00004296 case Ity_I8:
4297 case Ity_I16:
4298 case Ity_I32:
4299 case Ity_I64:
sewardjcafe5052013-01-17 14:24:35 +00004300 return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
sewardj45fa9f42012-05-21 10:18:10 +00004301 case Ity_V128: {
4302 IRAtom *v64hi, *v64lo;
sewardj2e595852005-06-30 23:33:37 +00004303 if (end == Iend_LE) {
sewardjcafe5052013-01-17 14:24:35 +00004304 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0, guard);
4305 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8, guard);
sewardj2e595852005-06-30 23:33:37 +00004306 } else {
sewardjcafe5052013-01-17 14:24:35 +00004307 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0, guard);
4308 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8, guard);
sewardj2e595852005-06-30 23:33:37 +00004309 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004310 return assignNew( 'V', mce,
sewardj170ee212004-12-10 18:57:51 +00004311 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00004312 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj45fa9f42012-05-21 10:18:10 +00004313 }
4314 case Ity_V256: {
4315 /* V256-bit case -- phrased in terms of 64 bit units (Qs),
4316 with Q3 being the most significant lane. */
4317 if (end == Iend_BE) goto unhandled;
sewardjcafe5052013-01-17 14:24:35 +00004318 IRAtom* v64Q0
4319 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0, guard);
4320 IRAtom* v64Q1
4321 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8, guard);
4322 IRAtom* v64Q2
4323 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16, guard);
4324 IRAtom* v64Q3
4325 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24, guard);
sewardj45fa9f42012-05-21 10:18:10 +00004326 return assignNew( 'V', mce,
4327 Ity_V256,
4328 IRExpr_Qop(Iop_64x4toV256,
4329 v64Q3, v64Q2, v64Q1, v64Q0));
4330 }
4331 unhandled:
sewardj170ee212004-12-10 18:57:51 +00004332 default:
sewardj2e595852005-06-30 23:33:37 +00004333 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00004334 }
4335}
4336
4337
sewardjcafe5052013-01-17 14:24:35 +00004338/* The most general handler for guarded loads. Assumes the
sewardjb9e6d242013-05-11 13:42:08 +00004339 definedness of GUARD has already been checked by the caller. A
4340 GUARD of NULL is assumed to mean "always True". Generates code to
4341 check the definedness and validity of ADDR.
sewardjcafe5052013-01-17 14:24:35 +00004342
4343 Generate IR to do a shadow load from ADDR and return the V bits.
4344 The loaded type is TY. The loaded data is then (shadow) widened by
4345 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD
4346 evaluates to False at run time then the returned Vbits are simply
4347 VALT instead. Note therefore that the argument type of VWIDEN must
4348 be TY and the result type of VWIDEN must equal the type of VALT.
4349*/
florian434ffae2012-07-19 17:23:42 +00004350static
sewardjcafe5052013-01-17 14:24:35 +00004351IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce,
4352 IREndness end, IRType ty,
4353 IRAtom* addr, UInt bias,
4354 IRAtom* guard,
4355 IROp vwiden, IRAtom* valt )
florian434ffae2012-07-19 17:23:42 +00004356{
sewardjcafe5052013-01-17 14:24:35 +00004357 /* Sanity check the conversion operation, and also set TYWIDE. */
4358 IRType tyWide = Ity_INVALID;
4359 switch (vwiden) {
4360 case Iop_INVALID:
4361 tyWide = ty;
4362 break;
4363 case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32:
4364 tyWide = Ity_I32;
4365 break;
4366 default:
4367 VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General");
florian434ffae2012-07-19 17:23:42 +00004368 }
4369
sewardjcafe5052013-01-17 14:24:35 +00004370 /* If the guard evaluates to True, this will hold the loaded V bits
4371 at TY. If the guard evaluates to False, this will be all
4372 ones, meaning "all undefined", in which case we will have to
florian5686b2d2013-01-29 03:57:40 +00004373 replace it using an ITE below. */
sewardjcafe5052013-01-17 14:24:35 +00004374 IRAtom* iftrue1
4375 = assignNew('V', mce, ty,
4376 expr2vbits_Load(mce, end, ty, addr, bias, guard));
4377 /* Now (shadow-) widen the loaded V bits to the desired width. In
4378 the guard-is-False case, the allowable widening operators will
4379 in the worst case (unsigned widening) at least leave the
4380 pre-widened part as being marked all-undefined, and in the best
4381 case (signed widening) mark the whole widened result as
4382 undefined. Anyway, it doesn't matter really, since in this case
florian5686b2d2013-01-29 03:57:40 +00004383 we will replace said value with the default value |valt| using an
4384 ITE. */
sewardjcafe5052013-01-17 14:24:35 +00004385 IRAtom* iftrue2
4386 = vwiden == Iop_INVALID
4387 ? iftrue1
4388 : assignNew('V', mce, tyWide, unop(vwiden, iftrue1));
4389 /* These are the V bits we will return if the load doesn't take
4390 place. */
4391 IRAtom* iffalse
4392 = valt;
florian5686b2d2013-01-29 03:57:40 +00004393 /* Prepare the cond for the ITE. Convert a NULL cond into
sewardjcafe5052013-01-17 14:24:35 +00004394 something that iropt knows how to fold out later. */
4395 IRAtom* cond
sewardjcc961652013-01-26 11:49:15 +00004396 = guard == NULL ? mkU1(1) : guard;
sewardjcafe5052013-01-17 14:24:35 +00004397 /* And assemble the final result. */
florian5686b2d2013-01-29 03:57:40 +00004398 return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse));
sewardjcafe5052013-01-17 14:24:35 +00004399}
4400
4401
4402/* A simpler handler for guarded loads, in which there is no
4403 conversion operation, and the default V bit return (when the guard
4404 evaluates to False at runtime) is "all defined". If there is no
4405 guard expression or the guard is always TRUE this function behaves
sewardjb9e6d242013-05-11 13:42:08 +00004406 like expr2vbits_Load. It is assumed that definedness of GUARD has
4407 already been checked at the call site. */
sewardjcafe5052013-01-17 14:24:35 +00004408static
4409IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce,
4410 IREndness end, IRType ty,
4411 IRAtom* addr, UInt bias,
4412 IRAtom *guard )
4413{
4414 return expr2vbits_Load_guarded_General(
4415 mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty)
4416 );
florian434ffae2012-07-19 17:23:42 +00004417}
4418
4419
sewardj170ee212004-12-10 18:57:51 +00004420static
florian5686b2d2013-01-29 03:57:40 +00004421IRAtom* expr2vbits_ITE ( MCEnv* mce,
4422 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
sewardj95448072004-11-22 20:19:51 +00004423{
florian5686b2d2013-01-29 03:57:40 +00004424 IRAtom *vbitsC, *vbits0, *vbits1;
sewardj95448072004-11-22 20:19:51 +00004425 IRType ty;
sewardj07bfda22013-01-29 21:11:55 +00004426 /* Given ITE(cond, iftrue, iffalse), generate
4427 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
sewardj95448072004-11-22 20:19:51 +00004428 That is, steer the V bits like the originals, but trash the
4429 result if the steering value is undefined. This gives
4430 lazy propagation. */
4431 tl_assert(isOriginalAtom(mce, cond));
florian5686b2d2013-01-29 03:57:40 +00004432 tl_assert(isOriginalAtom(mce, iftrue));
4433 tl_assert(isOriginalAtom(mce, iffalse));
sewardj95448072004-11-22 20:19:51 +00004434
4435 vbitsC = expr2vbits(mce, cond);
florian5686b2d2013-01-29 03:57:40 +00004436 vbits1 = expr2vbits(mce, iftrue);
sewardj07bfda22013-01-29 21:11:55 +00004437 vbits0 = expr2vbits(mce, iffalse);
sewardj1c0ce7a2009-07-01 08:10:49 +00004438 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00004439
4440 return
sewardj7cf4e6b2008-05-01 20:24:26 +00004441 mkUifU(mce, ty, assignNew('V', mce, ty,
florian5686b2d2013-01-29 03:57:40 +00004442 IRExpr_ITE(cond, vbits1, vbits0)),
sewardj95448072004-11-22 20:19:51 +00004443 mkPCastTo(mce, ty, vbitsC) );
4444}
4445
4446/* --------- This is the main expression-handling function. --------- */
4447
4448static
4449IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4450{
4451 switch (e->tag) {
4452
4453 case Iex_Get:
4454 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4455
4456 case Iex_GetI:
4457 return shadow_GETI( mce, e->Iex.GetI.descr,
4458 e->Iex.GetI.ix, e->Iex.GetI.bias );
4459
sewardj0b9d74a2006-12-24 02:24:11 +00004460 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004461 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00004462
4463 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00004464 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00004465
sewardje91cea72006-02-08 19:32:02 +00004466 case Iex_Qop:
4467 return expr2vbits_Qop(
4468 mce,
floriane2ab2972012-06-01 20:43:03 +00004469 e->Iex.Qop.details->op,
4470 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4471 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
sewardje91cea72006-02-08 19:32:02 +00004472 );
4473
sewardjed69fdb2006-02-03 16:12:27 +00004474 case Iex_Triop:
4475 return expr2vbits_Triop(
4476 mce,
florian26441742012-06-02 20:30:41 +00004477 e->Iex.Triop.details->op,
4478 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4479 e->Iex.Triop.details->arg3
sewardjed69fdb2006-02-03 16:12:27 +00004480 );
4481
sewardj95448072004-11-22 20:19:51 +00004482 case Iex_Binop:
4483 return expr2vbits_Binop(
4484 mce,
4485 e->Iex.Binop.op,
4486 e->Iex.Binop.arg1, e->Iex.Binop.arg2
4487 );
4488
4489 case Iex_Unop:
4490 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4491
sewardj2e595852005-06-30 23:33:37 +00004492 case Iex_Load:
4493 return expr2vbits_Load( mce, e->Iex.Load.end,
4494 e->Iex.Load.ty,
sewardjcafe5052013-01-17 14:24:35 +00004495 e->Iex.Load.addr, 0/*addr bias*/,
4496 NULL/* guard == "always True"*/ );
sewardj95448072004-11-22 20:19:51 +00004497
4498 case Iex_CCall:
4499 return mkLazyN( mce, e->Iex.CCall.args,
4500 e->Iex.CCall.retty,
4501 e->Iex.CCall.cee );
4502
florian5686b2d2013-01-29 03:57:40 +00004503 case Iex_ITE:
4504 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
sewardj07bfda22013-01-29 21:11:55 +00004505 e->Iex.ITE.iffalse);
njn25e49d8e72002-09-23 09:36:25 +00004506
4507 default:
sewardj95448072004-11-22 20:19:51 +00004508 VG_(printf)("\n");
4509 ppIRExpr(e);
4510 VG_(printf)("\n");
4511 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00004512 }
njn25e49d8e72002-09-23 09:36:25 +00004513}
4514
4515/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00004516/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00004517/*------------------------------------------------------------*/
4518
sewardj95448072004-11-22 20:19:51 +00004519/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00004520
4521static
sewardj95448072004-11-22 20:19:51 +00004522IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00004523{
sewardj7cf97ee2004-11-28 14:25:01 +00004524 IRType ty, tyH;
4525
sewardj95448072004-11-22 20:19:51 +00004526 /* vatom is vbits-value and as such can only have a shadow type. */
4527 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00004528
sewardj1c0ce7a2009-07-01 08:10:49 +00004529 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00004530 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00004531
sewardj95448072004-11-22 20:19:51 +00004532 if (tyH == Ity_I32) {
4533 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004534 case Ity_I32:
4535 return vatom;
4536 case Ity_I16:
4537 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4538 case Ity_I8:
4539 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4540 default:
4541 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004542 }
sewardj6cf40ff2005-04-20 22:31:26 +00004543 } else
4544 if (tyH == Ity_I64) {
4545 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004546 case Ity_I32:
4547 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4548 case Ity_I16:
4549 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4550 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4551 case Ity_I8:
4552 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4553 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4554 default:
4555 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00004556 }
sewardj95448072004-11-22 20:19:51 +00004557 } else {
4558 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004559 }
sewardj95448072004-11-22 20:19:51 +00004560 unhandled:
4561 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
4562 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00004563}
4564
njn25e49d8e72002-09-23 09:36:25 +00004565
sewardjcafe5052013-01-17 14:24:35 +00004566/* Generate a shadow store. |addr| is always the original address
4567 atom. You can pass in either originals or V-bits for the data
4568 atom, but obviously not both. This function generates a check for
sewardjb9e6d242013-05-11 13:42:08 +00004569 the definedness and (indirectly) the validity of |addr|, but only
4570 when |guard| evaluates to True at run time (or is NULL).
njn25e49d8e72002-09-23 09:36:25 +00004571
sewardjcafe5052013-01-17 14:24:35 +00004572 |guard| :: Ity_I1 controls whether the store really happens; NULL
4573 means it unconditionally does. Note that |guard| itself is not
4574 checked for definedness; the caller of this function must do that
4575 if necessary.
4576*/
sewardj95448072004-11-22 20:19:51 +00004577static
sewardj2e595852005-06-30 23:33:37 +00004578void do_shadow_Store ( MCEnv* mce,
4579 IREndness end,
4580 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00004581 IRAtom* data, IRAtom* vdata,
4582 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00004583{
sewardj170ee212004-12-10 18:57:51 +00004584 IROp mkAdd;
4585 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00004586 void* helper = NULL;
floriana5f894c2012-10-21 03:43:20 +00004587 const HChar* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00004588 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00004589
4590 tyAddr = mce->hWordTy;
4591 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4592 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00004593 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00004594
sewardj95448072004-11-22 20:19:51 +00004595 if (data) {
4596 tl_assert(!vdata);
4597 tl_assert(isOriginalAtom(mce, data));
4598 tl_assert(bias == 0);
4599 vdata = expr2vbits( mce, data );
4600 } else {
4601 tl_assert(vdata);
4602 }
njn25e49d8e72002-09-23 09:36:25 +00004603
sewardj95448072004-11-22 20:19:51 +00004604 tl_assert(isOriginalAtom(mce,addr));
4605 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00004606
sewardj1c0ce7a2009-07-01 08:10:49 +00004607 if (guard) {
4608 tl_assert(isOriginalAtom(mce, guard));
4609 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4610 }
4611
4612 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00004613
njn1d0825f2006-03-27 11:37:07 +00004614 // If we're not doing undefined value checking, pretend that this value
4615 // is "all valid". That lets Vex's optimiser remove some of the V bit
4616 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00004617 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00004618 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004619 case Ity_V256: // V256 weirdness -- used four times
sewardjbd43bfa2012-06-29 15:29:37 +00004620 c = IRConst_V256(V_BITS32_DEFINED); break;
sewardj45fa9f42012-05-21 10:18:10 +00004621 case Ity_V128: // V128 weirdness -- used twice
sewardj1c0ce7a2009-07-01 08:10:49 +00004622 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00004623 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
4624 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
4625 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
4626 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
4627 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4628 }
4629 vdata = IRExpr_Const( c );
4630 }
4631
sewardj95448072004-11-22 20:19:51 +00004632 /* First, emit a definedness test for the address. This also sets
sewardjb9e6d242013-05-11 13:42:08 +00004633 the address (shadow) to 'defined' following the test. Both of
4634 those actions are gated on |guard|. */
4635 complainIfUndefined( mce, addr, guard );
njn25e49d8e72002-09-23 09:36:25 +00004636
sewardj170ee212004-12-10 18:57:51 +00004637 /* Now decide which helper function to call to write the data V
4638 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00004639 if (end == Iend_LE) {
4640 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004641 case Ity_V256: /* we'll use the helper four times */
sewardj2e595852005-06-30 23:33:37 +00004642 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004643 case Ity_I64: helper = &MC_(helperc_STOREV64le);
4644 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00004645 break;
njn1d0825f2006-03-27 11:37:07 +00004646 case Ity_I32: helper = &MC_(helperc_STOREV32le);
4647 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00004648 break;
njn1d0825f2006-03-27 11:37:07 +00004649 case Ity_I16: helper = &MC_(helperc_STOREV16le);
4650 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00004651 break;
njn1d0825f2006-03-27 11:37:07 +00004652 case Ity_I8: helper = &MC_(helperc_STOREV8);
4653 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00004654 break;
4655 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4656 }
4657 } else {
sewardj8cf88b72005-07-08 01:29:33 +00004658 switch (ty) {
4659 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004660 case Ity_I64: helper = &MC_(helperc_STOREV64be);
4661 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00004662 break;
njn1d0825f2006-03-27 11:37:07 +00004663 case Ity_I32: helper = &MC_(helperc_STOREV32be);
4664 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00004665 break;
njn1d0825f2006-03-27 11:37:07 +00004666 case Ity_I16: helper = &MC_(helperc_STOREV16be);
4667 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00004668 break;
njn1d0825f2006-03-27 11:37:07 +00004669 case Ity_I8: helper = &MC_(helperc_STOREV8);
4670 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00004671 break;
sewardj45fa9f42012-05-21 10:18:10 +00004672 /* Note, no V256 case here, because no big-endian target that
4673 we support, has 256 vectors. */
sewardj8cf88b72005-07-08 01:29:33 +00004674 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
4675 }
sewardj95448072004-11-22 20:19:51 +00004676 }
njn25e49d8e72002-09-23 09:36:25 +00004677
sewardj45fa9f42012-05-21 10:18:10 +00004678 if (UNLIKELY(ty == Ity_V256)) {
4679
4680 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
4681 Q3 being the most significant lane. */
4682 /* These are the offsets of the Qs in memory. */
4683 Int offQ0, offQ1, offQ2, offQ3;
4684
4685 /* Various bits for constructing the 4 lane helper calls */
4686 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
4687 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
4688 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
4689 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
4690
4691 if (end == Iend_LE) {
4692 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
4693 } else {
4694 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
4695 }
4696
4697 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
4698 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
4699 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
4700 diQ0 = unsafeIRDirty_0_N(
4701 1/*regparms*/,
4702 hname, VG_(fnptr_to_fnentry)( helper ),
4703 mkIRExprVec_2( addrQ0, vdataQ0 )
4704 );
4705
4706 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
4707 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
4708 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
4709 diQ1 = unsafeIRDirty_0_N(
4710 1/*regparms*/,
4711 hname, VG_(fnptr_to_fnentry)( helper ),
4712 mkIRExprVec_2( addrQ1, vdataQ1 )
4713 );
4714
4715 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
4716 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
4717 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
4718 diQ2 = unsafeIRDirty_0_N(
4719 1/*regparms*/,
4720 hname, VG_(fnptr_to_fnentry)( helper ),
4721 mkIRExprVec_2( addrQ2, vdataQ2 )
4722 );
4723
4724 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
4725 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
4726 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
4727 diQ3 = unsafeIRDirty_0_N(
4728 1/*regparms*/,
4729 hname, VG_(fnptr_to_fnentry)( helper ),
4730 mkIRExprVec_2( addrQ3, vdataQ3 )
4731 );
4732
4733 if (guard)
4734 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
4735
4736 setHelperAnns( mce, diQ0 );
4737 setHelperAnns( mce, diQ1 );
4738 setHelperAnns( mce, diQ2 );
4739 setHelperAnns( mce, diQ3 );
4740 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
4741 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
4742 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
4743 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
4744
4745 }
4746 else if (UNLIKELY(ty == Ity_V128)) {
sewardj170ee212004-12-10 18:57:51 +00004747
sewardj20d38f22005-02-07 23:50:18 +00004748 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00004749 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00004750 /* also, need to be careful about endianness */
4751
njn4c245e52009-03-15 23:25:38 +00004752 Int offLo64, offHi64;
4753 IRDirty *diLo64, *diHi64;
4754 IRAtom *addrLo64, *addrHi64;
4755 IRAtom *vdataLo64, *vdataHi64;
4756 IRAtom *eBiasLo64, *eBiasHi64;
4757
sewardj2e595852005-06-30 23:33:37 +00004758 if (end == Iend_LE) {
4759 offLo64 = 0;
4760 offHi64 = 8;
4761 } else {
sewardj2e595852005-06-30 23:33:37 +00004762 offLo64 = 8;
4763 offHi64 = 0;
4764 }
4765
4766 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004767 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
4768 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004769 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004770 1/*regparms*/,
4771 hname, VG_(fnptr_to_fnentry)( helper ),
4772 mkIRExprVec_2( addrLo64, vdataLo64 )
4773 );
sewardj2e595852005-06-30 23:33:37 +00004774 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004775 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
4776 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004777 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004778 1/*regparms*/,
4779 hname, VG_(fnptr_to_fnentry)( helper ),
4780 mkIRExprVec_2( addrHi64, vdataHi64 )
4781 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004782 if (guard) diLo64->guard = guard;
4783 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004784 setHelperAnns( mce, diLo64 );
4785 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00004786 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
4787 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00004788
sewardj95448072004-11-22 20:19:51 +00004789 } else {
sewardj170ee212004-12-10 18:57:51 +00004790
njn4c245e52009-03-15 23:25:38 +00004791 IRDirty *di;
4792 IRAtom *addrAct;
4793
sewardj170ee212004-12-10 18:57:51 +00004794 /* 8/16/32/64-bit cases */
4795 /* Generate the actual address into addrAct. */
4796 if (bias == 0) {
4797 addrAct = addr;
4798 } else {
njn4c245e52009-03-15 23:25:38 +00004799 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00004800 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00004801 }
4802
4803 if (ty == Ity_I64) {
4804 /* We can't do this with regparm 2 on 32-bit platforms, since
4805 the back ends aren't clever enough to handle 64-bit
4806 regparm args. Therefore be different. */
4807 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004808 1/*regparms*/,
4809 hname, VG_(fnptr_to_fnentry)( helper ),
4810 mkIRExprVec_2( addrAct, vdata )
4811 );
sewardj170ee212004-12-10 18:57:51 +00004812 } else {
4813 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004814 2/*regparms*/,
4815 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00004816 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00004817 zwidenToHostWord( mce, vdata ))
4818 );
sewardj170ee212004-12-10 18:57:51 +00004819 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004820 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004821 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00004822 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00004823 }
njn25e49d8e72002-09-23 09:36:25 +00004824
sewardj95448072004-11-22 20:19:51 +00004825}
njn25e49d8e72002-09-23 09:36:25 +00004826
njn25e49d8e72002-09-23 09:36:25 +00004827
sewardj95448072004-11-22 20:19:51 +00004828/* Do lazy pessimistic propagation through a dirty helper call, by
4829 looking at the annotations on it. This is the most complex part of
4830 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00004831
sewardj95448072004-11-22 20:19:51 +00004832static IRType szToITy ( Int n )
4833{
4834 switch (n) {
4835 case 1: return Ity_I8;
4836 case 2: return Ity_I16;
4837 case 4: return Ity_I32;
4838 case 8: return Ity_I64;
4839 default: VG_(tool_panic)("szToITy(memcheck)");
4840 }
4841}
njn25e49d8e72002-09-23 09:36:25 +00004842
sewardj95448072004-11-22 20:19:51 +00004843static
4844void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
4845{
sewardj2eecb742012-06-01 16:11:41 +00004846 Int i, k, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00004847 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00004848 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00004849 IRTemp dst;
4850 IREndness end;
4851
4852 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00004853# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004854 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00004855# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004856 end = Iend_LE;
4857# else
4858# error "Unknown endianness"
4859# endif
njn25e49d8e72002-09-23 09:36:25 +00004860
sewardj95448072004-11-22 20:19:51 +00004861 /* First check the guard. */
sewardjb9e6d242013-05-11 13:42:08 +00004862 complainIfUndefined(mce, d->guard, NULL);
sewardj95448072004-11-22 20:19:51 +00004863
4864 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00004865 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00004866
florian434ffae2012-07-19 17:23:42 +00004867 /* Inputs: unmasked args
4868 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj95448072004-11-22 20:19:51 +00004869 for (i = 0; d->args[i]; i++) {
4870 if (d->cee->mcx_mask & (1<<i)) {
4871 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00004872 } else {
sewardj95448072004-11-22 20:19:51 +00004873 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
4874 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00004875 }
4876 }
sewardj95448072004-11-22 20:19:51 +00004877
4878 /* Inputs: guest state that we read. */
4879 for (i = 0; i < d->nFxState; i++) {
4880 tl_assert(d->fxState[i].fx != Ifx_None);
4881 if (d->fxState[i].fx == Ifx_Write)
4882 continue;
sewardja7203252004-11-26 19:17:47 +00004883
sewardj2eecb742012-06-01 16:11:41 +00004884 /* Enumerate the described state segments */
4885 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4886 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4887 gSz = d->fxState[i].size;
sewardja7203252004-11-26 19:17:47 +00004888
sewardj2eecb742012-06-01 16:11:41 +00004889 /* Ignore any sections marked as 'always defined'. */
4890 if (isAlwaysDefd(mce, gOff, gSz)) {
4891 if (0)
4892 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4893 gOff, gSz);
4894 continue;
4895 }
sewardje9e16d32004-12-10 13:17:55 +00004896
sewardj2eecb742012-06-01 16:11:41 +00004897 /* This state element is read or modified. So we need to
4898 consider it. If larger than 8 bytes, deal with it in
4899 8-byte chunks. */
4900 while (True) {
4901 tl_assert(gSz >= 0);
4902 if (gSz == 0) break;
4903 n = gSz <= 8 ? gSz : 8;
4904 /* update 'curr' with UifU of the state slice
4905 gOff .. gOff+n-1 */
4906 tySrc = szToITy( n );
florian434ffae2012-07-19 17:23:42 +00004907
4908 /* Observe the guard expression. If it is false use an
4909 all-bits-defined bit pattern */
4910 IRAtom *cond, *iffalse, *iftrue;
4911
sewardjcc961652013-01-26 11:49:15 +00004912 cond = assignNew('V', mce, Ity_I1, d->guard);
florian434ffae2012-07-19 17:23:42 +00004913 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
4914 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
4915 src = assignNew('V', mce, tySrc,
florian5686b2d2013-01-29 03:57:40 +00004916 IRExpr_ITE(cond, iftrue, iffalse));
florian434ffae2012-07-19 17:23:42 +00004917
sewardj2eecb742012-06-01 16:11:41 +00004918 here = mkPCastTo( mce, Ity_I32, src );
4919 curr = mkUifU32(mce, here, curr);
4920 gSz -= n;
4921 gOff += n;
4922 }
4923 }
sewardj95448072004-11-22 20:19:51 +00004924 }
4925
4926 /* Inputs: memory. First set up some info needed regardless of
4927 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00004928
4929 if (d->mFx != Ifx_None) {
4930 /* Because we may do multiple shadow loads/stores from the same
4931 base address, it's best to do a single test of its
4932 definedness right now. Post-instrumentation optimisation
4933 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00004934 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00004935 tl_assert(d->mAddr);
sewardjb9e6d242013-05-11 13:42:08 +00004936 complainIfUndefined(mce, d->mAddr, d->guard);
sewardj95448072004-11-22 20:19:51 +00004937
sewardj1c0ce7a2009-07-01 08:10:49 +00004938 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00004939 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
4940 tl_assert(tyAddr == mce->hWordTy); /* not really right */
4941 }
4942
4943 /* Deal with memory inputs (reads or modifies) */
4944 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00004945 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00004946 /* chew off 32-bit chunks. We don't care about the endianness
4947 since it's all going to be condensed down to a single bit,
4948 but nevertheless choose an endianness which is hopefully
4949 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00004950 while (toDo >= 4) {
4951 here = mkPCastTo(
4952 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00004953 expr2vbits_Load_guarded_Simple(
4954 mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00004955 );
4956 curr = mkUifU32(mce, here, curr);
4957 toDo -= 4;
4958 }
4959 /* chew off 16-bit chunks */
4960 while (toDo >= 2) {
4961 here = mkPCastTo(
4962 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00004963 expr2vbits_Load_guarded_Simple(
4964 mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00004965 );
4966 curr = mkUifU32(mce, here, curr);
4967 toDo -= 2;
4968 }
floriancda994b2012-06-08 16:01:19 +00004969 /* chew off the remaining 8-bit chunk, if any */
4970 if (toDo == 1) {
4971 here = mkPCastTo(
4972 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00004973 expr2vbits_Load_guarded_Simple(
4974 mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard )
floriancda994b2012-06-08 16:01:19 +00004975 );
4976 curr = mkUifU32(mce, here, curr);
4977 toDo -= 1;
4978 }
4979 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00004980 }
4981
4982 /* Whew! So curr is a 32-bit V-value summarising pessimistically
4983 all the inputs to the helper. Now we need to re-distribute the
4984 results to all destinations. */
4985
4986 /* Outputs: the destination temporary, if there is one. */
4987 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004988 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00004989 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00004990 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00004991 }
4992
4993 /* Outputs: guest state that we write or modify. */
4994 for (i = 0; i < d->nFxState; i++) {
4995 tl_assert(d->fxState[i].fx != Ifx_None);
4996 if (d->fxState[i].fx == Ifx_Read)
4997 continue;
sewardj2eecb742012-06-01 16:11:41 +00004998
4999 /* Enumerate the described state segments */
5000 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5001 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5002 gSz = d->fxState[i].size;
5003
5004 /* Ignore any sections marked as 'always defined'. */
5005 if (isAlwaysDefd(mce, gOff, gSz))
5006 continue;
5007
5008 /* This state element is written or modified. So we need to
5009 consider it. If larger than 8 bytes, deal with it in
5010 8-byte chunks. */
5011 while (True) {
5012 tl_assert(gSz >= 0);
5013 if (gSz == 0) break;
5014 n = gSz <= 8 ? gSz : 8;
5015 /* Write suitably-casted 'curr' to the state slice
5016 gOff .. gOff+n-1 */
5017 tyDst = szToITy( n );
5018 do_shadow_PUT( mce, gOff,
5019 NULL, /* original atom */
florian434ffae2012-07-19 17:23:42 +00005020 mkPCastTo( mce, tyDst, curr ), d->guard );
sewardj2eecb742012-06-01 16:11:41 +00005021 gSz -= n;
5022 gOff += n;
5023 }
sewardje9e16d32004-12-10 13:17:55 +00005024 }
sewardj95448072004-11-22 20:19:51 +00005025 }
5026
sewardj2e595852005-06-30 23:33:37 +00005027 /* Outputs: memory that we write or modify. Same comments about
5028 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00005029 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00005030 toDo = d->mSize;
5031 /* chew off 32-bit chunks */
5032 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00005033 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5034 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00005035 mkPCastTo( mce, Ity_I32, curr ),
florian434ffae2012-07-19 17:23:42 +00005036 d->guard );
sewardj95448072004-11-22 20:19:51 +00005037 toDo -= 4;
5038 }
5039 /* chew off 16-bit chunks */
5040 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00005041 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5042 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00005043 mkPCastTo( mce, Ity_I16, curr ),
florian434ffae2012-07-19 17:23:42 +00005044 d->guard );
sewardj95448072004-11-22 20:19:51 +00005045 toDo -= 2;
5046 }
floriancda994b2012-06-08 16:01:19 +00005047 /* chew off the remaining 8-bit chunk, if any */
5048 if (toDo == 1) {
5049 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5050 NULL, /* original data */
5051 mkPCastTo( mce, Ity_I8, curr ),
florian434ffae2012-07-19 17:23:42 +00005052 d->guard );
floriancda994b2012-06-08 16:01:19 +00005053 toDo -= 1;
5054 }
5055 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00005056 }
5057
njn25e49d8e72002-09-23 09:36:25 +00005058}
5059
sewardj1c0ce7a2009-07-01 08:10:49 +00005060
sewardj826ec492005-05-12 18:05:00 +00005061/* We have an ABI hint telling us that [base .. base+len-1] is to
5062 become undefined ("writable"). Generate code to call a helper to
5063 notify the A/V bit machinery of this fact.
5064
5065 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00005066 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
5067 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00005068*/
5069static
sewardj7cf4e6b2008-05-01 20:24:26 +00005070void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00005071{
5072 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00005073 /* Minor optimisation: if not doing origin tracking, ignore the
5074 supplied nia and pass zero instead. This is on the basis that
5075 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
5076 almost always generate a shorter instruction to put zero into a
5077 register than any other value. */
5078 if (MC_(clo_mc_level) < 3)
5079 nia = mkIRExpr_HWord(0);
5080
sewardj826ec492005-05-12 18:05:00 +00005081 di = unsafeIRDirty_0_N(
5082 0/*regparms*/,
5083 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00005084 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00005085 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00005086 );
sewardj7cf4e6b2008-05-01 20:24:26 +00005087 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00005088}
5089
njn25e49d8e72002-09-23 09:36:25 +00005090
sewardj1c0ce7a2009-07-01 08:10:49 +00005091/* ------ Dealing with IRCAS (big and complex) ------ */
5092
5093/* FWDS */
5094static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5095 IRAtom* baseaddr, Int offset );
5096static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
5097static void gen_store_b ( MCEnv* mce, Int szB,
5098 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5099 IRAtom* guard );
5100
5101static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
5102static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
5103
5104
5105/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
5106 IRExpr.Consts, else this asserts. If they are both Consts, it
5107 doesn't do anything. So that just leaves the RdTmp case.
5108
5109 In which case: this assigns the shadow value SHADOW to the IR
5110 shadow temporary associated with ORIG. That is, ORIG, being an
5111 original temporary, will have a shadow temporary associated with
5112 it. However, in the case envisaged here, there will so far have
5113 been no IR emitted to actually write a shadow value into that
5114 temporary. What this routine does is to (emit IR to) copy the
5115 value in SHADOW into said temporary, so that after this call,
5116 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
5117 value in SHADOW.
5118
5119 Point is to allow callers to compute "by hand" a shadow value for
5120 ORIG, and force it to be associated with ORIG.
5121
5122 How do we know that that shadow associated with ORIG has not so far
5123 been assigned to? Well, we don't per se know that, but supposing
5124 it had. Then this routine would create a second assignment to it,
5125 and later the IR sanity checker would barf. But that never
5126 happens. QED.
5127*/
5128static void bind_shadow_tmp_to_orig ( UChar how,
5129 MCEnv* mce,
5130 IRAtom* orig, IRAtom* shadow )
5131{
5132 tl_assert(isOriginalAtom(mce, orig));
5133 tl_assert(isShadowAtom(mce, shadow));
5134 switch (orig->tag) {
5135 case Iex_Const:
5136 tl_assert(shadow->tag == Iex_Const);
5137 break;
5138 case Iex_RdTmp:
5139 tl_assert(shadow->tag == Iex_RdTmp);
5140 if (how == 'V') {
5141 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
5142 shadow);
5143 } else {
5144 tl_assert(how == 'B');
5145 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
5146 shadow);
5147 }
5148 break;
5149 default:
5150 tl_assert(0);
5151 }
5152}
5153
5154
5155static
5156void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
5157{
5158 /* Scheme is (both single- and double- cases):
5159
5160 1. fetch data#,dataB (the proposed new value)
5161
5162 2. fetch expd#,expdB (what we expect to see at the address)
5163
5164 3. check definedness of address
5165
5166 4. load old#,oldB from shadow memory; this also checks
5167 addressibility of the address
5168
5169 5. the CAS itself
5170
sewardjafed4c52009-07-12 13:00:17 +00005171 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
sewardj1c0ce7a2009-07-01 08:10:49 +00005172
sewardjafed4c52009-07-12 13:00:17 +00005173 7. if "expected == old" (as computed by (6))
sewardj1c0ce7a2009-07-01 08:10:49 +00005174 store data#,dataB to shadow memory
5175
5176 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
5177 'data' but 7 stores 'data#'. Hence it is possible for the
5178 shadow data to be incorrectly checked and/or updated:
5179
sewardj1c0ce7a2009-07-01 08:10:49 +00005180 * 7 is at least gated correctly, since the 'expected == old'
5181 condition is derived from outputs of 5. However, the shadow
5182 write could happen too late: imagine after 5 we are
5183 descheduled, a different thread runs, writes a different
5184 (shadow) value at the address, and then we resume, hence
5185 overwriting the shadow value written by the other thread.
5186
5187 Because the original memory access is atomic, there's no way to
5188 make both the original and shadow accesses into a single atomic
5189 thing, hence this is unavoidable.
5190
5191 At least as Valgrind stands, I don't think it's a problem, since
5192 we're single threaded *and* we guarantee that there are no
5193 context switches during the execution of any specific superblock
5194 -- context switches can only happen at superblock boundaries.
5195
5196 If Valgrind ever becomes MT in the future, then it might be more
5197 of a problem. A possible kludge would be to artificially
5198 associate with the location, a lock, which we must acquire and
5199 release around the transaction as a whole. Hmm, that probably
5200 would't work properly since it only guards us against other
5201 threads doing CASs on the same location, not against other
5202 threads doing normal reads and writes.
sewardjafed4c52009-07-12 13:00:17 +00005203
5204 ------------------------------------------------------------
5205
5206 COMMENT_ON_CasCmpEQ:
5207
5208 Note two things. Firstly, in the sequence above, we compute
5209 "expected == old", but we don't check definedness of it. Why
5210 not? Also, the x86 and amd64 front ends use
sewardjb9e6d242013-05-11 13:42:08 +00005211 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
sewardjafed4c52009-07-12 13:00:17 +00005212 determination (expected == old ?) for themselves, and we also
5213 don't check definedness for those primops; we just say that the
5214 result is defined. Why? Details follow.
5215
5216 x86/amd64 contains various forms of locked insns:
5217 * lock prefix before all basic arithmetic insn;
5218 eg lock xorl %reg1,(%reg2)
5219 * atomic exchange reg-mem
5220 * compare-and-swaps
5221
5222 Rather than attempt to represent them all, which would be a
5223 royal PITA, I used a result from Maurice Herlihy
5224 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
5225 demonstrates that compare-and-swap is a primitive more general
5226 than the other two, and so can be used to represent all of them.
5227 So the translation scheme for (eg) lock incl (%reg) is as
5228 follows:
5229
5230 again:
5231 old = * %reg
5232 new = old + 1
5233 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
5234
5235 The "atomically" is the CAS bit. The scheme is always the same:
5236 get old value from memory, compute new value, atomically stuff
5237 new value back in memory iff the old value has not changed (iow,
5238 no other thread modified it in the meantime). If it has changed
5239 then we've been out-raced and we have to start over.
5240
5241 Now that's all very neat, but it has the bad side effect of
5242 introducing an explicit equality test into the translation.
5243 Consider the behaviour of said code on a memory location which
5244 is uninitialised. We will wind up doing a comparison on
5245 uninitialised data, and mc duly complains.
5246
5247 What's difficult about this is, the common case is that the
5248 location is uncontended, and so we're usually comparing the same
5249 value (* %reg) with itself. So we shouldn't complain even if it
5250 is undefined. But mc doesn't know that.
5251
5252 My solution is to mark the == in the IR specially, so as to tell
5253 mc that it almost certainly compares a value with itself, and we
5254 should just regard the result as always defined. Rather than
5255 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
5256 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
5257
5258 So there's always the question of, can this give a false
5259 negative? eg, imagine that initially, * %reg is defined; and we
5260 read that; but then in the gap between the read and the CAS, a
5261 different thread writes an undefined (and different) value at
5262 the location. Then the CAS in this thread will fail and we will
5263 go back to "again:", but without knowing that the trip back
5264 there was based on an undefined comparison. No matter; at least
5265 the other thread won the race and the location is correctly
5266 marked as undefined. What if it wrote an uninitialised version
5267 of the same value that was there originally, though?
5268
5269 etc etc. Seems like there's a small corner case in which we
5270 might lose the fact that something's defined -- we're out-raced
5271 in between the "old = * reg" and the "atomically {", _and_ the
5272 other thread is writing in an undefined version of what's
5273 already there. Well, that seems pretty unlikely.
5274
5275 ---
5276
5277 If we ever need to reinstate it .. code which generates a
5278 definedness test for "expected == old" was removed at r10432 of
5279 this file.
sewardj1c0ce7a2009-07-01 08:10:49 +00005280 */
5281 if (cas->oldHi == IRTemp_INVALID) {
5282 do_shadow_CAS_single( mce, cas );
5283 } else {
5284 do_shadow_CAS_double( mce, cas );
5285 }
5286}
5287
5288
5289static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
5290{
5291 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5292 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5293 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00005294 IRAtom *expd_eq_old = NULL;
5295 IROp opCasCmpEQ;
sewardj1c0ce7a2009-07-01 08:10:49 +00005296 Int elemSzB;
5297 IRType elemTy;
5298 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5299
5300 /* single CAS */
5301 tl_assert(cas->oldHi == IRTemp_INVALID);
5302 tl_assert(cas->expdHi == NULL);
5303 tl_assert(cas->dataHi == NULL);
5304
5305 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5306 switch (elemTy) {
sewardjafed4c52009-07-12 13:00:17 +00005307 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
5308 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
5309 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
5310 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
sewardj1c0ce7a2009-07-01 08:10:49 +00005311 default: tl_assert(0); /* IR defn disallows any other types */
5312 }
5313
5314 /* 1. fetch data# (the proposed new value) */
5315 tl_assert(isOriginalAtom(mce, cas->dataLo));
5316 vdataLo
5317 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5318 tl_assert(isShadowAtom(mce, vdataLo));
5319 if (otrak) {
5320 bdataLo
5321 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5322 tl_assert(isShadowAtom(mce, bdataLo));
5323 }
5324
5325 /* 2. fetch expected# (what we expect to see at the address) */
5326 tl_assert(isOriginalAtom(mce, cas->expdLo));
5327 vexpdLo
5328 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5329 tl_assert(isShadowAtom(mce, vexpdLo));
5330 if (otrak) {
5331 bexpdLo
5332 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5333 tl_assert(isShadowAtom(mce, bexpdLo));
5334 }
5335
5336 /* 3. check definedness of address */
5337 /* 4. fetch old# from shadow memory; this also checks
5338 addressibility of the address */
5339 voldLo
5340 = assignNew(
5341 'V', mce, elemTy,
5342 expr2vbits_Load(
5343 mce,
sewardjcafe5052013-01-17 14:24:35 +00005344 cas->end, elemTy, cas->addr, 0/*Addr bias*/,
5345 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005346 ));
sewardjafed4c52009-07-12 13:00:17 +00005347 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005348 if (otrak) {
5349 boldLo
5350 = assignNew('B', mce, Ity_I32,
5351 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005352 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005353 }
5354
5355 /* 5. the CAS itself */
5356 stmt( 'C', mce, IRStmt_CAS(cas) );
5357
sewardjafed4c52009-07-12 13:00:17 +00005358 /* 6. compute "expected == old" */
5359 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005360 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5361 tree, but it's not copied from the input block. */
5362 expd_eq_old
5363 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005364 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005365
5366 /* 7. if "expected == old"
5367 store data# to shadow memory */
5368 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
5369 NULL/*data*/, vdataLo/*vdata*/,
5370 expd_eq_old/*guard for store*/ );
5371 if (otrak) {
5372 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
5373 bdataLo/*bdata*/,
5374 expd_eq_old/*guard for store*/ );
5375 }
5376}
5377
5378
5379static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
5380{
5381 IRAtom *vdataHi = NULL, *bdataHi = NULL;
5382 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5383 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
5384 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5385 IRAtom *voldHi = NULL, *boldHi = NULL;
5386 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00005387 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
5388 IRAtom *expd_eq_old = NULL, *zero = NULL;
5389 IROp opCasCmpEQ, opOr, opXor;
sewardj1c0ce7a2009-07-01 08:10:49 +00005390 Int elemSzB, memOffsLo, memOffsHi;
5391 IRType elemTy;
5392 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5393
5394 /* double CAS */
5395 tl_assert(cas->oldHi != IRTemp_INVALID);
5396 tl_assert(cas->expdHi != NULL);
5397 tl_assert(cas->dataHi != NULL);
5398
5399 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5400 switch (elemTy) {
5401 case Ity_I8:
sewardjafed4c52009-07-12 13:00:17 +00005402 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
sewardj1c0ce7a2009-07-01 08:10:49 +00005403 elemSzB = 1; zero = mkU8(0);
5404 break;
5405 case Ity_I16:
sewardjafed4c52009-07-12 13:00:17 +00005406 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
sewardj1c0ce7a2009-07-01 08:10:49 +00005407 elemSzB = 2; zero = mkU16(0);
5408 break;
5409 case Ity_I32:
sewardjafed4c52009-07-12 13:00:17 +00005410 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
sewardj1c0ce7a2009-07-01 08:10:49 +00005411 elemSzB = 4; zero = mkU32(0);
5412 break;
5413 case Ity_I64:
sewardjafed4c52009-07-12 13:00:17 +00005414 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
sewardj1c0ce7a2009-07-01 08:10:49 +00005415 elemSzB = 8; zero = mkU64(0);
5416 break;
5417 default:
5418 tl_assert(0); /* IR defn disallows any other types */
5419 }
5420
5421 /* 1. fetch data# (the proposed new value) */
5422 tl_assert(isOriginalAtom(mce, cas->dataHi));
5423 tl_assert(isOriginalAtom(mce, cas->dataLo));
5424 vdataHi
5425 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
5426 vdataLo
5427 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5428 tl_assert(isShadowAtom(mce, vdataHi));
5429 tl_assert(isShadowAtom(mce, vdataLo));
5430 if (otrak) {
5431 bdataHi
5432 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
5433 bdataLo
5434 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5435 tl_assert(isShadowAtom(mce, bdataHi));
5436 tl_assert(isShadowAtom(mce, bdataLo));
5437 }
5438
5439 /* 2. fetch expected# (what we expect to see at the address) */
5440 tl_assert(isOriginalAtom(mce, cas->expdHi));
5441 tl_assert(isOriginalAtom(mce, cas->expdLo));
5442 vexpdHi
5443 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5444 vexpdLo
5445 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5446 tl_assert(isShadowAtom(mce, vexpdHi));
5447 tl_assert(isShadowAtom(mce, vexpdLo));
5448 if (otrak) {
5449 bexpdHi
5450 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5451 bexpdLo
5452 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5453 tl_assert(isShadowAtom(mce, bexpdHi));
5454 tl_assert(isShadowAtom(mce, bexpdLo));
5455 }
5456
5457 /* 3. check definedness of address */
5458 /* 4. fetch old# from shadow memory; this also checks
5459 addressibility of the address */
5460 if (cas->end == Iend_LE) {
5461 memOffsLo = 0;
5462 memOffsHi = elemSzB;
5463 } else {
5464 tl_assert(cas->end == Iend_BE);
5465 memOffsLo = elemSzB;
5466 memOffsHi = 0;
5467 }
5468 voldHi
5469 = assignNew(
5470 'V', mce, elemTy,
5471 expr2vbits_Load(
5472 mce,
sewardjcafe5052013-01-17 14:24:35 +00005473 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/,
5474 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005475 ));
5476 voldLo
5477 = assignNew(
5478 'V', mce, elemTy,
5479 expr2vbits_Load(
5480 mce,
sewardjcafe5052013-01-17 14:24:35 +00005481 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/,
5482 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005483 ));
sewardjafed4c52009-07-12 13:00:17 +00005484 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5485 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005486 if (otrak) {
5487 boldHi
5488 = assignNew('B', mce, Ity_I32,
5489 gen_load_b(mce, elemSzB, cas->addr,
5490 memOffsHi/*addr bias*/));
5491 boldLo
5492 = assignNew('B', mce, Ity_I32,
5493 gen_load_b(mce, elemSzB, cas->addr,
5494 memOffsLo/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005495 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5496 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005497 }
5498
5499 /* 5. the CAS itself */
5500 stmt( 'C', mce, IRStmt_CAS(cas) );
5501
sewardjafed4c52009-07-12 13:00:17 +00005502 /* 6. compute "expected == old" */
5503 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005504 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5505 tree, but it's not copied from the input block. */
5506 /*
5507 xHi = oldHi ^ expdHi;
5508 xLo = oldLo ^ expdLo;
5509 xHL = xHi | xLo;
5510 expd_eq_old = xHL == 0;
5511 */
sewardj1c0ce7a2009-07-01 08:10:49 +00005512 xHi = assignNew('C', mce, elemTy,
5513 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005514 xLo = assignNew('C', mce, elemTy,
5515 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005516 xHL = assignNew('C', mce, elemTy,
5517 binop(opOr, xHi, xLo));
sewardj1c0ce7a2009-07-01 08:10:49 +00005518 expd_eq_old
5519 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005520 binop(opCasCmpEQ, xHL, zero));
sewardj1c0ce7a2009-07-01 08:10:49 +00005521
5522 /* 7. if "expected == old"
5523 store data# to shadow memory */
5524 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5525 NULL/*data*/, vdataHi/*vdata*/,
5526 expd_eq_old/*guard for store*/ );
5527 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5528 NULL/*data*/, vdataLo/*vdata*/,
5529 expd_eq_old/*guard for store*/ );
5530 if (otrak) {
5531 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5532 bdataHi/*bdata*/,
5533 expd_eq_old/*guard for store*/ );
5534 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5535 bdataLo/*bdata*/,
5536 expd_eq_old/*guard for store*/ );
5537 }
5538}
5539
5540
sewardjdb5907d2009-11-26 17:20:21 +00005541/* ------ Dealing with LL/SC (not difficult) ------ */
5542
5543static void do_shadow_LLSC ( MCEnv* mce,
5544 IREndness stEnd,
5545 IRTemp stResult,
5546 IRExpr* stAddr,
5547 IRExpr* stStoredata )
5548{
5549 /* In short: treat a load-linked like a normal load followed by an
5550 assignment of the loaded (shadow) data to the result temporary.
5551 Treat a store-conditional like a normal store, and mark the
5552 result temporary as defined. */
5553 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
5554 IRTemp resTmp = findShadowTmpV(mce, stResult);
5555
5556 tl_assert(isIRAtom(stAddr));
5557 if (stStoredata)
5558 tl_assert(isIRAtom(stStoredata));
5559
5560 if (stStoredata == NULL) {
5561 /* Load Linked */
5562 /* Just treat this as a normal load, followed by an assignment of
5563 the value to .result. */
5564 /* Stay sane */
5565 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5566 || resTy == Ity_I16 || resTy == Ity_I8);
5567 assign( 'V', mce, resTmp,
5568 expr2vbits_Load(
sewardjcafe5052013-01-17 14:24:35 +00005569 mce, stEnd, resTy, stAddr, 0/*addr bias*/,
5570 NULL/*always happens*/) );
sewardjdb5907d2009-11-26 17:20:21 +00005571 } else {
5572 /* Store Conditional */
5573 /* Stay sane */
5574 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
5575 stStoredata);
5576 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
5577 || dataTy == Ity_I16 || dataTy == Ity_I8);
5578 do_shadow_Store( mce, stEnd,
5579 stAddr, 0/* addr bias */,
5580 stStoredata,
5581 NULL /* shadow data */,
5582 NULL/*guard*/ );
5583 /* This is a store conditional, so it writes to .result a value
5584 indicating whether or not the store succeeded. Just claim
5585 this value is always defined. In the PowerPC interpretation
5586 of store-conditional, definedness of the success indication
5587 depends on whether the address of the store matches the
5588 reservation address. But we can't tell that here (and
5589 anyway, we're not being PowerPC-specific). At least we are
5590 guaranteed that the definedness of the store address, and its
5591 addressibility, will be checked as per normal. So it seems
5592 pretty safe to just say that the success indication is always
5593 defined.
5594
5595 In schemeS, for origin tracking, we must correspondingly set
5596 a no-origin value for the origin shadow of .result.
5597 */
5598 tl_assert(resTy == Ity_I1);
5599 assign( 'V', mce, resTmp, definedOfType(resTy) );
5600 }
5601}
5602
5603
sewardjcafe5052013-01-17 14:24:35 +00005604/* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
5605
5606static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg )
5607{
sewardjb9e6d242013-05-11 13:42:08 +00005608 complainIfUndefined(mce, sg->guard, NULL);
5609 /* do_shadow_Store will generate code to check the definedness and
5610 validity of sg->addr, in the case where sg->guard evaluates to
5611 True at run-time. */
sewardjcafe5052013-01-17 14:24:35 +00005612 do_shadow_Store( mce, sg->end,
5613 sg->addr, 0/* addr bias */,
5614 sg->data,
5615 NULL /* shadow data */,
5616 sg->guard );
5617}
5618
5619static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg )
5620{
sewardjb9e6d242013-05-11 13:42:08 +00005621 complainIfUndefined(mce, lg->guard, NULL);
5622 /* expr2vbits_Load_guarded_General will generate code to check the
5623 definedness and validity of lg->addr, in the case where
5624 lg->guard evaluates to True at run-time. */
sewardjcafe5052013-01-17 14:24:35 +00005625
5626 /* Look at the LoadG's built-in conversion operation, to determine
5627 the source (actual loaded data) type, and the equivalent IROp.
5628 NOTE that implicitly we are taking a widening operation to be
5629 applied to original atoms and producing one that applies to V
5630 bits. Since signed and unsigned widening are self-shadowing,
5631 this is a straight copy of the op (modulo swapping from the
5632 IRLoadGOp form to the IROp form). Note also therefore that this
5633 implicitly duplicates the logic to do with said widening ops in
5634 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */
5635 IROp vwiden = Iop_INVALID;
5636 IRType loadedTy = Ity_INVALID;
5637 switch (lg->cvt) {
5638 case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break;
5639 case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break;
5640 case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break;
5641 case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break;
5642 case ILGop_8Sto32: loadedTy = Ity_I8; vwiden = Iop_8Sto32; break;
5643 default: VG_(tool_panic)("do_shadow_LoadG");
5644 }
5645
5646 IRAtom* vbits_alt
5647 = expr2vbits( mce, lg->alt );
5648 IRAtom* vbits_final
5649 = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy,
5650 lg->addr, 0/*addr bias*/,
5651 lg->guard, vwiden, vbits_alt );
5652 /* And finally, bind the V bits to the destination temporary. */
5653 assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final );
5654}
5655
5656
sewardj95448072004-11-22 20:19:51 +00005657/*------------------------------------------------------------*/
5658/*--- Memcheck main ---*/
5659/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00005660
sewardj7cf4e6b2008-05-01 20:24:26 +00005661static void schemeS ( MCEnv* mce, IRStmt* st );
5662
sewardj95448072004-11-22 20:19:51 +00005663static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00005664{
sewardj95448072004-11-22 20:19:51 +00005665 ULong n = 0;
5666 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00005667 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00005668 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00005669 return False;
5670 tl_assert(at->tag == Iex_Const);
5671 con = at->Iex.Const.con;
5672 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00005673 case Ico_U1: return False;
5674 case Ico_U8: n = (ULong)con->Ico.U8; break;
5675 case Ico_U16: n = (ULong)con->Ico.U16; break;
5676 case Ico_U32: n = (ULong)con->Ico.U32; break;
5677 case Ico_U64: n = (ULong)con->Ico.U64; break;
5678 case Ico_F64: return False;
sewardjb5b87402011-03-07 16:05:35 +00005679 case Ico_F32i: return False;
sewardjd5204dc2004-12-31 01:16:11 +00005680 case Ico_F64i: return False;
5681 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00005682 default: ppIRExpr(at); tl_assert(0);
5683 }
5684 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00005685 return (/*32*/ n == 0xFEFEFEFFULL
5686 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00005687 /*32*/ || n == 0x7F7F7F7FULL
tomd9774d72005-06-27 08:11:01 +00005688 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00005689 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00005690 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00005691 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00005692 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00005693 );
sewardj95448072004-11-22 20:19:51 +00005694}
njn25e49d8e72002-09-23 09:36:25 +00005695
sewardj95448072004-11-22 20:19:51 +00005696static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
5697{
sewardjd5204dc2004-12-31 01:16:11 +00005698 Int i;
5699 IRExpr* e;
5700 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00005701 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00005702 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00005703 case Ist_WrTmp:
5704 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00005705 switch (e->tag) {
5706 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00005707 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00005708 return False;
sewardjd5204dc2004-12-31 01:16:11 +00005709 case Iex_Const:
5710 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00005711 case Iex_Unop:
5712 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00005713 case Iex_GetI:
5714 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00005715 case Iex_Binop:
5716 return isBogusAtom(e->Iex.Binop.arg1)
5717 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00005718 case Iex_Triop:
florian26441742012-06-02 20:30:41 +00005719 return isBogusAtom(e->Iex.Triop.details->arg1)
5720 || isBogusAtom(e->Iex.Triop.details->arg2)
5721 || isBogusAtom(e->Iex.Triop.details->arg3);
sewardje91cea72006-02-08 19:32:02 +00005722 case Iex_Qop:
floriane2ab2972012-06-01 20:43:03 +00005723 return isBogusAtom(e->Iex.Qop.details->arg1)
5724 || isBogusAtom(e->Iex.Qop.details->arg2)
5725 || isBogusAtom(e->Iex.Qop.details->arg3)
5726 || isBogusAtom(e->Iex.Qop.details->arg4);
florian5686b2d2013-01-29 03:57:40 +00005727 case Iex_ITE:
5728 return isBogusAtom(e->Iex.ITE.cond)
5729 || isBogusAtom(e->Iex.ITE.iftrue)
5730 || isBogusAtom(e->Iex.ITE.iffalse);
sewardj2e595852005-06-30 23:33:37 +00005731 case Iex_Load:
5732 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00005733 case Iex_CCall:
5734 for (i = 0; e->Iex.CCall.args[i]; i++)
5735 if (isBogusAtom(e->Iex.CCall.args[i]))
5736 return True;
5737 return False;
5738 default:
5739 goto unhandled;
5740 }
sewardjd5204dc2004-12-31 01:16:11 +00005741 case Ist_Dirty:
5742 d = st->Ist.Dirty.details;
5743 for (i = 0; d->args[i]; i++)
5744 if (isBogusAtom(d->args[i]))
5745 return True;
florian6c0aa2c2013-01-21 01:27:22 +00005746 if (isBogusAtom(d->guard))
sewardjd5204dc2004-12-31 01:16:11 +00005747 return True;
5748 if (d->mAddr && isBogusAtom(d->mAddr))
5749 return True;
5750 return False;
sewardj95448072004-11-22 20:19:51 +00005751 case Ist_Put:
5752 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00005753 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00005754 return isBogusAtom(st->Ist.PutI.details->ix)
5755 || isBogusAtom(st->Ist.PutI.details->data);
sewardj2e595852005-06-30 23:33:37 +00005756 case Ist_Store:
5757 return isBogusAtom(st->Ist.Store.addr)
5758 || isBogusAtom(st->Ist.Store.data);
sewardjcafe5052013-01-17 14:24:35 +00005759 case Ist_StoreG: {
5760 IRStoreG* sg = st->Ist.StoreG.details;
5761 return isBogusAtom(sg->addr) || isBogusAtom(sg->data)
5762 || isBogusAtom(sg->guard);
5763 }
5764 case Ist_LoadG: {
5765 IRLoadG* lg = st->Ist.LoadG.details;
5766 return isBogusAtom(lg->addr) || isBogusAtom(lg->alt)
5767 || isBogusAtom(lg->guard);
5768 }
sewardj95448072004-11-22 20:19:51 +00005769 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00005770 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00005771 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00005772 return isBogusAtom(st->Ist.AbiHint.base)
5773 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00005774 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00005775 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00005776 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00005777 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005778 case Ist_CAS:
5779 cas = st->Ist.CAS.details;
5780 return isBogusAtom(cas->addr)
5781 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
5782 || isBogusAtom(cas->expdLo)
5783 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
5784 || isBogusAtom(cas->dataLo);
sewardjdb5907d2009-11-26 17:20:21 +00005785 case Ist_LLSC:
5786 return isBogusAtom(st->Ist.LLSC.addr)
5787 || (st->Ist.LLSC.storedata
5788 ? isBogusAtom(st->Ist.LLSC.storedata)
5789 : False);
sewardj95448072004-11-22 20:19:51 +00005790 default:
5791 unhandled:
5792 ppIRStmt(st);
5793 VG_(tool_panic)("hasBogusLiterals");
5794 }
5795}
njn25e49d8e72002-09-23 09:36:25 +00005796
njn25e49d8e72002-09-23 09:36:25 +00005797
sewardj0b9d74a2006-12-24 02:24:11 +00005798IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00005799 IRSB* sb_in,
sewardj461df9c2006-01-17 02:06:39 +00005800 VexGuestLayout* layout,
5801 VexGuestExtents* vge,
florianca503be2012-10-07 21:59:42 +00005802 VexArchInfo* archinfo_host,
sewardjd54babf2005-03-21 00:55:49 +00005803 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00005804{
sewardj7cf4e6b2008-05-01 20:24:26 +00005805 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00005806 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00005807 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00005808 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00005809 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00005810 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00005811
5812 if (gWordTy != hWordTy) {
5813 /* We don't currently support this case. */
5814 VG_(tool_panic)("host/guest word size mismatch");
5815 }
njn25e49d8e72002-09-23 09:36:25 +00005816
sewardj6cf40ff2005-04-20 22:31:26 +00005817 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00005818 tl_assert(sizeof(UWord) == sizeof(void*));
5819 tl_assert(sizeof(Word) == sizeof(void*));
5820 tl_assert(sizeof(Addr) == sizeof(void*));
5821 tl_assert(sizeof(ULong) == 8);
5822 tl_assert(sizeof(Long) == 8);
5823 tl_assert(sizeof(Addr64) == 8);
5824 tl_assert(sizeof(UInt) == 4);
5825 tl_assert(sizeof(Int) == 4);
5826
5827 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00005828
sewardj0b9d74a2006-12-24 02:24:11 +00005829 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00005830 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00005831
sewardj1c0ce7a2009-07-01 08:10:49 +00005832 /* Set up the running environment. Both .sb and .tmpMap are
5833 modified as we go along. Note that tmps are added to both
5834 .sb->tyenv and .tmpMap together, so the valid index-set for
5835 those two arrays should always be identical. */
5836 VG_(memset)(&mce, 0, sizeof(mce));
5837 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00005838 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00005839 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00005840 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00005841 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005842
sewardj54eac252012-03-27 10:19:39 +00005843 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
5844 Darwin. 10.7 is mostly built with LLVM, which uses these for
5845 bitfield inserts, and we get a lot of false errors if the cheap
5846 interpretation is used, alas. Could solve this much better if
5847 we knew which of such adds came from x86/amd64 LEA instructions,
5848 since these are the only ones really needing the expensive
5849 interpretation, but that would require some way to tag them in
5850 the _toIR.c front ends, which is a lot of faffing around. So
5851 for now just use the slow and blunt-instrument solution. */
5852 mce.useLLVMworkarounds = False;
5853# if defined(VGO_darwin)
5854 mce.useLLVMworkarounds = True;
5855# endif
5856
sewardj1c0ce7a2009-07-01 08:10:49 +00005857 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
5858 sizeof(TempMapEnt));
5859 for (i = 0; i < sb_in->tyenv->types_used; i++) {
5860 TempMapEnt ent;
5861 ent.kind = Orig;
5862 ent.shadowV = IRTemp_INVALID;
5863 ent.shadowB = IRTemp_INVALID;
5864 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00005865 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005866 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00005867
sewardj151b90d2005-07-06 19:42:23 +00005868 /* Make a preliminary inspection of the statements, to see if there
5869 are any dodgy-looking literals. If there are, we generate
5870 extra-detailed (hence extra-expensive) instrumentation in
5871 places. Scan the whole bb even if dodgyness is found earlier,
5872 so that the flatness assertion is applied to all stmts. */
5873
5874 bogus = False;
sewardj95448072004-11-22 20:19:51 +00005875
sewardj1c0ce7a2009-07-01 08:10:49 +00005876 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00005877
sewardj1c0ce7a2009-07-01 08:10:49 +00005878 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00005879 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00005880 tl_assert(isFlatIRStmt(st));
5881
sewardj151b90d2005-07-06 19:42:23 +00005882 if (!bogus) {
5883 bogus = checkForBogusLiterals(st);
5884 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00005885 VG_(printf)("bogus: ");
5886 ppIRStmt(st);
5887 VG_(printf)("\n");
5888 }
5889 }
sewardjd5204dc2004-12-31 01:16:11 +00005890
sewardj151b90d2005-07-06 19:42:23 +00005891 }
5892
5893 mce.bogusLiterals = bogus;
5894
sewardja0871482006-10-18 12:41:55 +00005895 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00005896
sewardj1c0ce7a2009-07-01 08:10:49 +00005897 tl_assert(mce.sb == sb_out);
5898 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00005899
sewardja0871482006-10-18 12:41:55 +00005900 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00005901 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00005902
sewardj1c0ce7a2009-07-01 08:10:49 +00005903 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00005904 tl_assert(st);
5905 tl_assert(isFlatIRStmt(st));
5906
sewardj1c0ce7a2009-07-01 08:10:49 +00005907 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00005908 i++;
5909 }
5910
sewardjf1962d32006-10-19 13:22:16 +00005911 /* Nasty problem. IR optimisation of the pre-instrumented IR may
5912 cause the IR following the preamble to contain references to IR
5913 temporaries defined in the preamble. Because the preamble isn't
5914 instrumented, these temporaries don't have any shadows.
5915 Nevertheless uses of them following the preamble will cause
5916 memcheck to generate references to their shadows. End effect is
5917 to cause IR sanity check failures, due to references to
5918 non-existent shadows. This is only evident for the complex
5919 preambles used for function wrapping on TOC-afflicted platforms
sewardj6e9de462011-06-28 07:25:29 +00005920 (ppc64-linux).
sewardjf1962d32006-10-19 13:22:16 +00005921
5922 The following loop therefore scans the preamble looking for
5923 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00005924 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00005925 'defined'. This is the same resulting IR as if the main
5926 instrumentation loop before had been applied to the statement
5927 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00005928
5929 Similarly, if origin tracking is enabled, we must generate an
5930 assignment for the corresponding origin (B) shadow, claiming
5931 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00005932 */
5933 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005934 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005935 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00005936 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005937 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00005938 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00005939 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00005940 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
5941 if (MC_(clo_mc_level) == 3) {
5942 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00005943 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00005944 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
5945 }
sewardjf1962d32006-10-19 13:22:16 +00005946 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00005947 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
5948 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00005949 VG_(printf)("\n");
5950 }
5951 }
5952 }
5953
sewardja0871482006-10-18 12:41:55 +00005954 /* Iterate over the remaining stmts to generate instrumentation. */
5955
sewardj1c0ce7a2009-07-01 08:10:49 +00005956 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00005957 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00005958 tl_assert(i < sb_in->stmts_used);
5959 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00005960
sewardj1c0ce7a2009-07-01 08:10:49 +00005961 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00005962
sewardj1c0ce7a2009-07-01 08:10:49 +00005963 st = sb_in->stmts[i];
5964 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00005965
5966 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005967 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00005968 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00005969 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00005970 }
5971
sewardj1c0ce7a2009-07-01 08:10:49 +00005972 if (MC_(clo_mc_level) == 3) {
5973 /* See comments on case Ist_CAS below. */
5974 if (st->tag != Ist_CAS)
5975 schemeS( &mce, st );
5976 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005977
sewardj29faa502005-03-16 18:20:21 +00005978 /* Generate instrumentation code for each stmt ... */
5979
sewardj95448072004-11-22 20:19:51 +00005980 switch (st->tag) {
5981
sewardj0b9d74a2006-12-24 02:24:11 +00005982 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00005983 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
5984 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00005985 break;
5986
sewardj95448072004-11-22 20:19:51 +00005987 case Ist_Put:
5988 do_shadow_PUT( &mce,
5989 st->Ist.Put.offset,
5990 st->Ist.Put.data,
florian434ffae2012-07-19 17:23:42 +00005991 NULL /* shadow atom */, NULL /* guard */ );
njn25e49d8e72002-09-23 09:36:25 +00005992 break;
5993
sewardj95448072004-11-22 20:19:51 +00005994 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00005995 do_shadow_PUTI( &mce, st->Ist.PutI.details);
njn25e49d8e72002-09-23 09:36:25 +00005996 break;
5997
sewardj2e595852005-06-30 23:33:37 +00005998 case Ist_Store:
5999 do_shadow_Store( &mce, st->Ist.Store.end,
6000 st->Ist.Store.addr, 0/* addr bias */,
6001 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00006002 NULL /* shadow data */,
6003 NULL/*guard*/ );
njn25e49d8e72002-09-23 09:36:25 +00006004 break;
6005
sewardjcafe5052013-01-17 14:24:35 +00006006 case Ist_StoreG:
6007 do_shadow_StoreG( &mce, st->Ist.StoreG.details );
6008 break;
6009
6010 case Ist_LoadG:
6011 do_shadow_LoadG( &mce, st->Ist.LoadG.details );
6012 break;
6013
sewardj95448072004-11-22 20:19:51 +00006014 case Ist_Exit:
sewardjb9e6d242013-05-11 13:42:08 +00006015 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
njn25e49d8e72002-09-23 09:36:25 +00006016 break;
6017
sewardj29faa502005-03-16 18:20:21 +00006018 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00006019 break;
6020
6021 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00006022 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00006023 break;
6024
sewardj95448072004-11-22 20:19:51 +00006025 case Ist_Dirty:
6026 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00006027 break;
6028
sewardj826ec492005-05-12 18:05:00 +00006029 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00006030 do_AbiHint( &mce, st->Ist.AbiHint.base,
6031 st->Ist.AbiHint.len,
6032 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00006033 break;
6034
sewardj1c0ce7a2009-07-01 08:10:49 +00006035 case Ist_CAS:
6036 do_shadow_CAS( &mce, st->Ist.CAS.details );
6037 /* Note, do_shadow_CAS copies the CAS itself to the output
6038 block, because it needs to add instrumentation both
6039 before and after it. Hence skip the copy below. Also
6040 skip the origin-tracking stuff (call to schemeS) above,
6041 since that's all tangled up with it too; do_shadow_CAS
6042 does it all. */
6043 break;
6044
sewardjdb5907d2009-11-26 17:20:21 +00006045 case Ist_LLSC:
6046 do_shadow_LLSC( &mce,
6047 st->Ist.LLSC.end,
6048 st->Ist.LLSC.result,
6049 st->Ist.LLSC.addr,
6050 st->Ist.LLSC.storedata );
6051 break;
6052
njn25e49d8e72002-09-23 09:36:25 +00006053 default:
sewardj95448072004-11-22 20:19:51 +00006054 VG_(printf)("\n");
6055 ppIRStmt(st);
6056 VG_(printf)("\n");
6057 VG_(tool_panic)("memcheck: unhandled IRStmt");
6058
6059 } /* switch (st->tag) */
6060
sewardj7cf4e6b2008-05-01 20:24:26 +00006061 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006062 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00006063 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00006064 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00006065 VG_(printf)("\n");
6066 }
6067 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006068 }
sewardj95448072004-11-22 20:19:51 +00006069
sewardj1c0ce7a2009-07-01 08:10:49 +00006070 /* ... and finally copy the stmt itself to the output. Except,
6071 skip the copy of IRCASs; see comments on case Ist_CAS
6072 above. */
6073 if (st->tag != Ist_CAS)
6074 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00006075 }
njn25e49d8e72002-09-23 09:36:25 +00006076
sewardj95448072004-11-22 20:19:51 +00006077 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00006078 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00006079
sewardj95448072004-11-22 20:19:51 +00006080 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006081 VG_(printf)("sb_in->next = ");
6082 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00006083 VG_(printf)("\n\n");
6084 }
njn25e49d8e72002-09-23 09:36:25 +00006085
sewardjb9e6d242013-05-11 13:42:08 +00006086 complainIfUndefined( &mce, sb_in->next, NULL );
njn25e49d8e72002-09-23 09:36:25 +00006087
sewardj7cf4e6b2008-05-01 20:24:26 +00006088 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006089 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00006090 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00006091 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00006092 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006093 }
sewardj95448072004-11-22 20:19:51 +00006094 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006095 }
njn25e49d8e72002-09-23 09:36:25 +00006096
sewardj1c0ce7a2009-07-01 08:10:49 +00006097 /* If this fails, there's been some serious snafu with tmp management,
6098 that should be investigated. */
6099 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
6100 VG_(deleteXA)( mce.tmpMap );
6101
6102 tl_assert(mce.sb == sb_out);
6103 return sb_out;
sewardj95448072004-11-22 20:19:51 +00006104}
njn25e49d8e72002-09-23 09:36:25 +00006105
sewardj81651dc2007-08-28 06:05:20 +00006106/*------------------------------------------------------------*/
6107/*--- Post-tree-build final tidying ---*/
6108/*------------------------------------------------------------*/
6109
6110/* This exploits the observation that Memcheck often produces
6111 repeated conditional calls of the form
6112
sewardj7cf4e6b2008-05-01 20:24:26 +00006113 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00006114
6115 with the same guard expression G guarding the same helper call.
6116 The second and subsequent calls are redundant. This usually
6117 results from instrumentation of guest code containing multiple
6118 memory references at different constant offsets from the same base
6119 register. After optimisation of the instrumentation, you get a
6120 test for the definedness of the base register for each memory
6121 reference, which is kinda pointless. MC_(final_tidy) therefore
6122 looks for such repeated calls and removes all but the first. */
6123
6124/* A struct for recording which (helper, guard) pairs we have already
6125 seen. */
6126typedef
6127 struct { void* entry; IRExpr* guard; }
6128 Pair;
6129
6130/* Return True if e1 and e2 definitely denote the same value (used to
6131 compare guards). Return False if unknown; False is the safe
6132 answer. Since guest registers and guest memory do not have the
6133 SSA property we must return False if any Gets or Loads appear in
6134 the expression. */
6135
6136static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
6137{
6138 if (e1->tag != e2->tag)
6139 return False;
6140 switch (e1->tag) {
6141 case Iex_Const:
6142 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
6143 case Iex_Binop:
6144 return e1->Iex.Binop.op == e2->Iex.Binop.op
6145 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
6146 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
6147 case Iex_Unop:
6148 return e1->Iex.Unop.op == e2->Iex.Unop.op
6149 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
6150 case Iex_RdTmp:
6151 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
florian5686b2d2013-01-29 03:57:40 +00006152 case Iex_ITE:
6153 return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond )
6154 && sameIRValue( e1->Iex.ITE.iftrue, e2->Iex.ITE.iftrue )
6155 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse );
sewardj81651dc2007-08-28 06:05:20 +00006156 case Iex_Qop:
6157 case Iex_Triop:
6158 case Iex_CCall:
6159 /* be lazy. Could define equality for these, but they never
6160 appear to be used. */
6161 return False;
6162 case Iex_Get:
6163 case Iex_GetI:
6164 case Iex_Load:
6165 /* be conservative - these may not give the same value each
6166 time */
6167 return False;
6168 case Iex_Binder:
6169 /* should never see this */
6170 /* fallthrough */
6171 default:
6172 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
6173 ppIRExpr(e1);
6174 VG_(tool_panic)("memcheck:sameIRValue");
6175 return False;
6176 }
6177}
6178
6179/* See if 'pairs' already has an entry for (entry, guard). Return
6180 True if so. If not, add an entry. */
6181
6182static
6183Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
6184{
6185 Pair p;
6186 Pair* pp;
6187 Int i, n = VG_(sizeXA)( pairs );
6188 for (i = 0; i < n; i++) {
6189 pp = VG_(indexXA)( pairs, i );
6190 if (pp->entry == entry && sameIRValue(pp->guard, guard))
6191 return True;
6192 }
6193 p.guard = guard;
6194 p.entry = entry;
6195 VG_(addToXA)( pairs, &p );
6196 return False;
6197}
6198
florian11f3cc82012-10-21 02:19:35 +00006199static Bool is_helperc_value_checkN_fail ( const HChar* name )
sewardj81651dc2007-08-28 06:05:20 +00006200{
6201 return
sewardj7cf4e6b2008-05-01 20:24:26 +00006202 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
6203 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
6204 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
6205 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
6206 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
6207 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
6208 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
6209 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00006210}
6211
6212IRSB* MC_(final_tidy) ( IRSB* sb_in )
6213{
6214 Int i;
6215 IRStmt* st;
6216 IRDirty* di;
6217 IRExpr* guard;
6218 IRCallee* cee;
6219 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00006220 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
6221 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00006222 /* Scan forwards through the statements. Each time a call to one
6223 of the relevant helpers is seen, check if we have made a
6224 previous call to the same helper using the same guard
6225 expression, and if so, delete the call. */
6226 for (i = 0; i < sb_in->stmts_used; i++) {
6227 st = sb_in->stmts[i];
6228 tl_assert(st);
6229 if (st->tag != Ist_Dirty)
6230 continue;
6231 di = st->Ist.Dirty.details;
6232 guard = di->guard;
florian6c0aa2c2013-01-21 01:27:22 +00006233 tl_assert(guard);
sewardj81651dc2007-08-28 06:05:20 +00006234 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
6235 cee = di->cee;
6236 if (!is_helperc_value_checkN_fail( cee->name ))
6237 continue;
6238 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
6239 guard 'guard'. Check if we have already seen a call to this
6240 function with the same guard. If so, delete it. If not,
6241 add it to the set of calls we do know about. */
6242 alreadyPresent = check_or_add( pairs, guard, cee->addr );
6243 if (alreadyPresent) {
6244 sb_in->stmts[i] = IRStmt_NoOp();
6245 if (0) VG_(printf)("XX\n");
6246 }
6247 }
6248 VG_(deleteXA)( pairs );
6249 return sb_in;
6250}
6251
6252
sewardj7cf4e6b2008-05-01 20:24:26 +00006253/*------------------------------------------------------------*/
6254/*--- Origin tracking stuff ---*/
6255/*------------------------------------------------------------*/
6256
sewardj1c0ce7a2009-07-01 08:10:49 +00006257/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00006258static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
6259{
sewardj1c0ce7a2009-07-01 08:10:49 +00006260 TempMapEnt* ent;
6261 /* VG_(indexXA) range-checks 'orig', hence no need to check
6262 here. */
6263 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6264 tl_assert(ent->kind == Orig);
6265 if (ent->shadowB == IRTemp_INVALID) {
6266 IRTemp tmpB
6267 = newTemp( mce, Ity_I32, BSh );
6268 /* newTemp may cause mce->tmpMap to resize, hence previous results
6269 from VG_(indexXA) are invalid. */
6270 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6271 tl_assert(ent->kind == Orig);
6272 tl_assert(ent->shadowB == IRTemp_INVALID);
6273 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00006274 }
sewardj1c0ce7a2009-07-01 08:10:49 +00006275 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00006276}
6277
6278static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
6279{
6280 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
6281}
6282
sewardjcafe5052013-01-17 14:24:35 +00006283
6284/* Make a guarded origin load, with no special handling in the
6285 didn't-happen case. A GUARD of NULL is assumed to mean "always
6286 True".
6287
6288 Generate IR to do a shadow origins load from BASEADDR+OFFSET and
6289 return the otag. The loaded size is SZB. If GUARD evaluates to
6290 False at run time then the returned otag is zero.
6291*/
6292static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB,
6293 IRAtom* baseaddr,
6294 Int offset, IRExpr* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00006295{
6296 void* hFun;
florian6bd9dc12012-11-23 16:17:43 +00006297 const HChar* hName;
sewardj7cf4e6b2008-05-01 20:24:26 +00006298 IRTemp bTmp;
6299 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00006300 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00006301 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6302 IRAtom* ea = baseaddr;
6303 if (offset != 0) {
6304 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6305 : mkU64( (Long)(Int)offset );
6306 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6307 }
sewardj1c0ce7a2009-07-01 08:10:49 +00006308 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00006309
6310 switch (szB) {
6311 case 1: hFun = (void*)&MC_(helperc_b_load1);
6312 hName = "MC_(helperc_b_load1)";
6313 break;
6314 case 2: hFun = (void*)&MC_(helperc_b_load2);
6315 hName = "MC_(helperc_b_load2)";
6316 break;
6317 case 4: hFun = (void*)&MC_(helperc_b_load4);
6318 hName = "MC_(helperc_b_load4)";
6319 break;
6320 case 8: hFun = (void*)&MC_(helperc_b_load8);
6321 hName = "MC_(helperc_b_load8)";
6322 break;
6323 case 16: hFun = (void*)&MC_(helperc_b_load16);
6324 hName = "MC_(helperc_b_load16)";
6325 break;
sewardj45fa9f42012-05-21 10:18:10 +00006326 case 32: hFun = (void*)&MC_(helperc_b_load32);
6327 hName = "MC_(helperc_b_load32)";
6328 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00006329 default:
6330 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
6331 tl_assert(0);
6332 }
6333 di = unsafeIRDirty_1_N(
6334 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
6335 mkIRExprVec_1( ea )
6336 );
sewardjcafe5052013-01-17 14:24:35 +00006337 if (guard) {
6338 di->guard = guard;
6339 /* Ideally the didn't-happen return value here would be
6340 all-zeroes (unknown-origin), so it'd be harmless if it got
6341 used inadvertantly. We slum it out with the IR-mandated
6342 default value (0b01 repeating, 0x55 etc) as that'll probably
6343 trump all legitimate otags via Max32, and it's pretty
6344 obviously bogus. */
6345 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006346 /* no need to mess with any annotations. This call accesses
6347 neither guest state nor guest memory. */
6348 stmt( 'B', mce, IRStmt_Dirty(di) );
6349 if (mce->hWordTy == Ity_I64) {
6350 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00006351 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00006352 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
6353 return mkexpr(bTmp32);
6354 } else {
6355 /* 32-bit host */
6356 return mkexpr(bTmp);
6357 }
6358}
sewardj1c0ce7a2009-07-01 08:10:49 +00006359
sewardjcafe5052013-01-17 14:24:35 +00006360
6361/* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The
6362 loaded size is SZB. The load is regarded as unconditional (always
6363 happens).
6364*/
6365static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
6366 Int offset )
florian434ffae2012-07-19 17:23:42 +00006367{
sewardjcafe5052013-01-17 14:24:35 +00006368 return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/);
florian434ffae2012-07-19 17:23:42 +00006369}
6370
sewardjcafe5052013-01-17 14:24:35 +00006371
6372/* The most general handler for guarded origin loads. A GUARD of NULL
6373 is assumed to mean "always True".
6374
6375 Generate IR to do a shadow origin load from ADDR+BIAS and return
6376 the B bits. The loaded type is TY. If GUARD evaluates to False at
6377 run time then the returned B bits are simply BALT instead.
6378*/
6379static
6380IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce,
6381 IRType ty,
6382 IRAtom* addr, UInt bias,
6383 IRAtom* guard, IRAtom* balt )
6384{
6385 /* If the guard evaluates to True, this will hold the loaded
6386 origin. If the guard evaluates to False, this will be zero,
6387 meaning "unknown origin", in which case we will have to replace
florian5686b2d2013-01-29 03:57:40 +00006388 it using an ITE below. */
sewardjcafe5052013-01-17 14:24:35 +00006389 IRAtom* iftrue
6390 = assignNew('B', mce, Ity_I32,
6391 gen_guarded_load_b(mce, sizeofIRType(ty),
6392 addr, bias, guard));
6393 /* These are the bits we will return if the load doesn't take
6394 place. */
6395 IRAtom* iffalse
6396 = balt;
florian5686b2d2013-01-29 03:57:40 +00006397 /* Prepare the cond for the ITE. Convert a NULL cond into
sewardjcafe5052013-01-17 14:24:35 +00006398 something that iropt knows how to fold out later. */
6399 IRAtom* cond
sewardjcc961652013-01-26 11:49:15 +00006400 = guard == NULL ? mkU1(1) : guard;
sewardjcafe5052013-01-17 14:24:35 +00006401 /* And assemble the final result. */
florian5686b2d2013-01-29 03:57:40 +00006402 return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse));
sewardjcafe5052013-01-17 14:24:35 +00006403}
6404
6405
6406/* Generate a shadow origins store. guard :: Ity_I1 controls whether
6407 the store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00006408static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00006409 IRAtom* baseaddr, Int offset, IRAtom* dataB,
6410 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00006411{
6412 void* hFun;
florian6bd9dc12012-11-23 16:17:43 +00006413 const HChar* hName;
sewardj7cf4e6b2008-05-01 20:24:26 +00006414 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00006415 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00006416 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6417 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00006418 if (guard) {
6419 tl_assert(isOriginalAtom(mce, guard));
6420 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
6421 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006422 if (offset != 0) {
6423 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6424 : mkU64( (Long)(Int)offset );
6425 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6426 }
6427 if (mce->hWordTy == Ity_I64)
6428 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
6429
6430 switch (szB) {
6431 case 1: hFun = (void*)&MC_(helperc_b_store1);
6432 hName = "MC_(helperc_b_store1)";
6433 break;
6434 case 2: hFun = (void*)&MC_(helperc_b_store2);
6435 hName = "MC_(helperc_b_store2)";
6436 break;
6437 case 4: hFun = (void*)&MC_(helperc_b_store4);
6438 hName = "MC_(helperc_b_store4)";
6439 break;
6440 case 8: hFun = (void*)&MC_(helperc_b_store8);
6441 hName = "MC_(helperc_b_store8)";
6442 break;
6443 case 16: hFun = (void*)&MC_(helperc_b_store16);
6444 hName = "MC_(helperc_b_store16)";
6445 break;
sewardj45fa9f42012-05-21 10:18:10 +00006446 case 32: hFun = (void*)&MC_(helperc_b_store32);
6447 hName = "MC_(helperc_b_store32)";
6448 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00006449 default:
6450 tl_assert(0);
6451 }
6452 di = unsafeIRDirty_0_N( 2/*regparms*/,
6453 hName, VG_(fnptr_to_fnentry)( hFun ),
6454 mkIRExprVec_2( ea, dataB )
6455 );
6456 /* no need to mess with any annotations. This call accesses
6457 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00006458 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00006459 stmt( 'B', mce, IRStmt_Dirty(di) );
6460}
6461
6462static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006463 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00006464 if (eTy == Ity_I64)
6465 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
6466 if (eTy == Ity_I32)
6467 return e;
6468 tl_assert(0);
6469}
6470
6471static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006472 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00006473 tl_assert(eTy == Ity_I32);
6474 if (dstTy == Ity_I64)
6475 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
6476 tl_assert(0);
6477}
6478
sewardjdb5907d2009-11-26 17:20:21 +00006479
sewardj7cf4e6b2008-05-01 20:24:26 +00006480static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
6481{
6482 tl_assert(MC_(clo_mc_level) == 3);
6483
6484 switch (e->tag) {
6485
6486 case Iex_GetI: {
6487 IRRegArray* descr_b;
6488 IRAtom *t1, *t2, *t3, *t4;
6489 IRRegArray* descr = e->Iex.GetI.descr;
6490 IRType equivIntTy
6491 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6492 /* If this array is unshadowable for whatever reason, use the
6493 usual approximation. */
6494 if (equivIntTy == Ity_INVALID)
6495 return mkU32(0);
6496 tl_assert(sizeofIRType(equivIntTy) >= 4);
6497 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6498 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6499 equivIntTy, descr->nElems );
6500 /* Do a shadow indexed get of the same size, giving t1. Take
6501 the bottom 32 bits of it, giving t2. Compute into t3 the
6502 origin for the index (almost certainly zero, but there's
6503 no harm in being completely general here, since iropt will
6504 remove any useless code), and fold it in, giving a final
6505 value t4. */
6506 t1 = assignNew( 'B', mce, equivIntTy,
6507 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
6508 e->Iex.GetI.bias ));
6509 t2 = narrowTo32( mce, t1 );
6510 t3 = schemeE( mce, e->Iex.GetI.ix );
6511 t4 = gen_maxU32( mce, t2, t3 );
6512 return t4;
6513 }
6514 case Iex_CCall: {
6515 Int i;
6516 IRAtom* here;
6517 IRExpr** args = e->Iex.CCall.args;
6518 IRAtom* curr = mkU32(0);
6519 for (i = 0; args[i]; i++) {
6520 tl_assert(i < 32);
6521 tl_assert(isOriginalAtom(mce, args[i]));
6522 /* Only take notice of this arg if the callee's
6523 mc-exclusion mask does not say it is to be excluded. */
6524 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
6525 /* the arg is to be excluded from definedness checking.
6526 Do nothing. */
6527 if (0) VG_(printf)("excluding %s(%d)\n",
6528 e->Iex.CCall.cee->name, i);
6529 } else {
6530 /* calculate the arg's definedness, and pessimistically
6531 merge it in. */
6532 here = schemeE( mce, args[i] );
6533 curr = gen_maxU32( mce, curr, here );
6534 }
6535 }
6536 return curr;
6537 }
6538 case Iex_Load: {
6539 Int dszB;
6540 dszB = sizeofIRType(e->Iex.Load.ty);
6541 /* assert that the B value for the address is already
6542 available (somewhere) */
6543 tl_assert(isIRAtom(e->Iex.Load.addr));
6544 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
6545 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
6546 }
florian5686b2d2013-01-29 03:57:40 +00006547 case Iex_ITE: {
6548 IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond );
florian5686b2d2013-01-29 03:57:40 +00006549 IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue );
sewardj07bfda22013-01-29 21:11:55 +00006550 IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse );
sewardj7cf4e6b2008-05-01 20:24:26 +00006551 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
6552 }
6553 case Iex_Qop: {
floriane2ab2972012-06-01 20:43:03 +00006554 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
6555 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
6556 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
6557 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006558 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
6559 gen_maxU32( mce, b3, b4 ) );
6560 }
6561 case Iex_Triop: {
florian26441742012-06-02 20:30:41 +00006562 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
6563 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
6564 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006565 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
6566 }
6567 case Iex_Binop: {
sewardjafed4c52009-07-12 13:00:17 +00006568 switch (e->Iex.Binop.op) {
6569 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
6570 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6571 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6572 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6573 /* Just say these all produce a defined result,
6574 regardless of their arguments. See
6575 COMMENT_ON_CasCmpEQ in this file. */
6576 return mkU32(0);
6577 default: {
6578 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6579 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6580 return gen_maxU32( mce, b1, b2 );
6581 }
6582 }
6583 tl_assert(0);
6584 /*NOTREACHED*/
sewardj7cf4e6b2008-05-01 20:24:26 +00006585 }
6586 case Iex_Unop: {
6587 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6588 return b1;
6589 }
6590 case Iex_Const:
6591 return mkU32(0);
6592 case Iex_RdTmp:
6593 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
6594 case Iex_Get: {
6595 Int b_offset = MC_(get_otrack_shadow_offset)(
6596 e->Iex.Get.offset,
6597 sizeofIRType(e->Iex.Get.ty)
6598 );
6599 tl_assert(b_offset >= -1
6600 && b_offset <= mce->layout->total_sizeB -4);
6601 if (b_offset >= 0) {
6602 /* FIXME: this isn't an atom! */
6603 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
6604 Ity_I32 );
6605 }
6606 return mkU32(0);
6607 }
6608 default:
6609 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
6610 ppIRExpr(e);
6611 VG_(tool_panic)("memcheck:schemeE");
6612 }
6613}
6614
sewardjdb5907d2009-11-26 17:20:21 +00006615
sewardj7cf4e6b2008-05-01 20:24:26 +00006616static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
6617{
6618 // This is a hacked version of do_shadow_Dirty
sewardj2eecb742012-06-01 16:11:41 +00006619 Int i, k, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00006620 IRAtom *here, *curr;
6621 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00006622
6623 /* First check the guard. */
6624 curr = schemeE( mce, d->guard );
6625
6626 /* Now round up all inputs and maxU32 over them. */
6627
florian434ffae2012-07-19 17:23:42 +00006628 /* Inputs: unmasked args
6629 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj7cf4e6b2008-05-01 20:24:26 +00006630 for (i = 0; d->args[i]; i++) {
6631 if (d->cee->mcx_mask & (1<<i)) {
6632 /* ignore this arg */
6633 } else {
6634 here = schemeE( mce, d->args[i] );
6635 curr = gen_maxU32( mce, curr, here );
6636 }
6637 }
6638
6639 /* Inputs: guest state that we read. */
6640 for (i = 0; i < d->nFxState; i++) {
6641 tl_assert(d->fxState[i].fx != Ifx_None);
6642 if (d->fxState[i].fx == Ifx_Write)
6643 continue;
6644
sewardj2eecb742012-06-01 16:11:41 +00006645 /* Enumerate the described state segments */
6646 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6647 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6648 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006649
sewardj2eecb742012-06-01 16:11:41 +00006650 /* Ignore any sections marked as 'always defined'. */
6651 if (isAlwaysDefd(mce, gOff, gSz)) {
6652 if (0)
6653 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6654 gOff, gSz);
6655 continue;
sewardj7cf4e6b2008-05-01 20:24:26 +00006656 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006657
sewardj2eecb742012-06-01 16:11:41 +00006658 /* This state element is read or modified. So we need to
6659 consider it. If larger than 4 bytes, deal with it in
6660 4-byte chunks. */
6661 while (True) {
6662 Int b_offset;
6663 tl_assert(gSz >= 0);
6664 if (gSz == 0) break;
6665 n = gSz <= 4 ? gSz : 4;
6666 /* update 'curr' with maxU32 of the state slice
6667 gOff .. gOff+n-1 */
6668 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6669 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006670 /* Observe the guard expression. If it is false use 0, i.e.
6671 nothing is known about the origin */
6672 IRAtom *cond, *iffalse, *iftrue;
6673
sewardjcc961652013-01-26 11:49:15 +00006674 cond = assignNew( 'B', mce, Ity_I1, d->guard);
florian434ffae2012-07-19 17:23:42 +00006675 iffalse = mkU32(0);
6676 iftrue = assignNew( 'B', mce, Ity_I32,
6677 IRExpr_Get(b_offset
6678 + 2*mce->layout->total_sizeB,
6679 Ity_I32));
6680 here = assignNew( 'B', mce, Ity_I32,
florian5686b2d2013-01-29 03:57:40 +00006681 IRExpr_ITE(cond, iftrue, iffalse));
sewardj2eecb742012-06-01 16:11:41 +00006682 curr = gen_maxU32( mce, curr, here );
6683 }
6684 gSz -= n;
6685 gOff += n;
6686 }
6687 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006688 }
6689
6690 /* Inputs: memory */
6691
6692 if (d->mFx != Ifx_None) {
6693 /* Because we may do multiple shadow loads/stores from the same
6694 base address, it's best to do a single test of its
6695 definedness right now. Post-instrumentation optimisation
6696 should remove all but this test. */
6697 tl_assert(d->mAddr);
6698 here = schemeE( mce, d->mAddr );
6699 curr = gen_maxU32( mce, curr, here );
6700 }
6701
6702 /* Deal with memory inputs (reads or modifies) */
6703 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006704 toDo = d->mSize;
6705 /* chew off 32-bit chunks. We don't care about the endianness
6706 since it's all going to be condensed down to a single bit,
6707 but nevertheless choose an endianness which is hopefully
6708 native to the platform. */
6709 while (toDo >= 4) {
florian434ffae2012-07-19 17:23:42 +00006710 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
6711 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006712 curr = gen_maxU32( mce, curr, here );
6713 toDo -= 4;
6714 }
sewardj8c93fcc2008-10-30 13:08:31 +00006715 /* handle possible 16-bit excess */
6716 while (toDo >= 2) {
florian434ffae2012-07-19 17:23:42 +00006717 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
6718 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006719 curr = gen_maxU32( mce, curr, here );
6720 toDo -= 2;
6721 }
floriancda994b2012-06-08 16:01:19 +00006722 /* chew off the remaining 8-bit chunk, if any */
6723 if (toDo == 1) {
florian434ffae2012-07-19 17:23:42 +00006724 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
6725 d->guard );
floriancda994b2012-06-08 16:01:19 +00006726 curr = gen_maxU32( mce, curr, here );
6727 toDo -= 1;
6728 }
6729 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006730 }
6731
6732 /* Whew! So curr is a 32-bit B-value which should give an origin
6733 of some use if any of the inputs to the helper are undefined.
6734 Now we need to re-distribute the results to all destinations. */
6735
6736 /* Outputs: the destination temporary, if there is one. */
6737 if (d->tmp != IRTemp_INVALID) {
6738 dst = findShadowTmpB(mce, d->tmp);
6739 assign( 'V', mce, dst, curr );
6740 }
6741
6742 /* Outputs: guest state that we write or modify. */
6743 for (i = 0; i < d->nFxState; i++) {
6744 tl_assert(d->fxState[i].fx != Ifx_None);
6745 if (d->fxState[i].fx == Ifx_Read)
6746 continue;
6747
sewardj2eecb742012-06-01 16:11:41 +00006748 /* Enumerate the described state segments */
6749 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6750 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6751 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006752
sewardj2eecb742012-06-01 16:11:41 +00006753 /* Ignore any sections marked as 'always defined'. */
6754 if (isAlwaysDefd(mce, gOff, gSz))
6755 continue;
6756
6757 /* This state element is written or modified. So we need to
6758 consider it. If larger than 4 bytes, deal with it in
6759 4-byte chunks. */
6760 while (True) {
6761 Int b_offset;
6762 tl_assert(gSz >= 0);
6763 if (gSz == 0) break;
6764 n = gSz <= 4 ? gSz : 4;
6765 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
6766 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6767 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006768
florian6c0aa2c2013-01-21 01:27:22 +00006769 /* If the guard expression evaluates to false we simply Put
6770 the value that is already stored in the guest state slot */
6771 IRAtom *cond, *iffalse;
6772
sewardjcc961652013-01-26 11:49:15 +00006773 cond = assignNew('B', mce, Ity_I1,
6774 d->guard);
florian6c0aa2c2013-01-21 01:27:22 +00006775 iffalse = assignNew('B', mce, Ity_I32,
6776 IRExpr_Get(b_offset +
6777 2*mce->layout->total_sizeB,
6778 Ity_I32));
6779 curr = assignNew('V', mce, Ity_I32,
florian5686b2d2013-01-29 03:57:40 +00006780 IRExpr_ITE(cond, curr, iffalse));
florian6c0aa2c2013-01-21 01:27:22 +00006781
sewardj2eecb742012-06-01 16:11:41 +00006782 stmt( 'B', mce, IRStmt_Put(b_offset
florian6c0aa2c2013-01-21 01:27:22 +00006783 + 2*mce->layout->total_sizeB,
sewardj2eecb742012-06-01 16:11:41 +00006784 curr ));
6785 }
6786 gSz -= n;
6787 gOff += n;
sewardj7cf4e6b2008-05-01 20:24:26 +00006788 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006789 }
6790 }
6791
6792 /* Outputs: memory that we write or modify. Same comments about
6793 endianness as above apply. */
6794 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006795 toDo = d->mSize;
6796 /* chew off 32-bit chunks */
6797 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006798 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006799 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006800 toDo -= 4;
6801 }
sewardj8c93fcc2008-10-30 13:08:31 +00006802 /* handle possible 16-bit excess */
6803 while (toDo >= 2) {
sewardjcafe5052013-01-17 14:24:35 +00006804 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
6805 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006806 toDo -= 2;
6807 }
floriancda994b2012-06-08 16:01:19 +00006808 /* chew off the remaining 8-bit chunk, if any */
6809 if (toDo == 1) {
6810 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006811 d->guard );
floriancda994b2012-06-08 16:01:19 +00006812 toDo -= 1;
6813 }
6814 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006815 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006816}
6817
sewardjdb5907d2009-11-26 17:20:21 +00006818
sewardjcafe5052013-01-17 14:24:35 +00006819/* Generate IR for origin shadowing for a general guarded store. */
6820static void do_origins_Store_guarded ( MCEnv* mce,
6821 IREndness stEnd,
6822 IRExpr* stAddr,
6823 IRExpr* stData,
6824 IRExpr* guard )
sewardjdb5907d2009-11-26 17:20:21 +00006825{
6826 Int dszB;
6827 IRAtom* dataB;
6828 /* assert that the B value for the address is already available
6829 (somewhere), since the call to schemeE will want to see it.
6830 XXXX how does this actually ensure that?? */
6831 tl_assert(isIRAtom(stAddr));
6832 tl_assert(isIRAtom(stData));
6833 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
6834 dataB = schemeE( mce, stData );
sewardjcafe5052013-01-17 14:24:35 +00006835 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard );
6836}
6837
6838
6839/* Generate IR for origin shadowing for a plain store. */
6840static void do_origins_Store_plain ( MCEnv* mce,
6841 IREndness stEnd,
6842 IRExpr* stAddr,
6843 IRExpr* stData )
6844{
6845 do_origins_Store_guarded ( mce, stEnd, stAddr, stData,
6846 NULL/*guard*/ );
6847}
6848
6849
6850/* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
6851
6852static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg )
6853{
6854 do_origins_Store_guarded( mce, sg->end, sg->addr,
6855 sg->data, sg->guard );
6856}
6857
6858static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg )
6859{
6860 IRType loadedTy = Ity_INVALID;
6861 switch (lg->cvt) {
6862 case ILGop_Ident32: loadedTy = Ity_I32; break;
6863 case ILGop_16Uto32: loadedTy = Ity_I16; break;
6864 case ILGop_16Sto32: loadedTy = Ity_I16; break;
6865 case ILGop_8Uto32: loadedTy = Ity_I8; break;
6866 case ILGop_8Sto32: loadedTy = Ity_I8; break;
6867 default: VG_(tool_panic)("schemeS.IRLoadG");
6868 }
6869 IRAtom* ori_alt
6870 = schemeE( mce,lg->alt );
6871 IRAtom* ori_final
6872 = expr2ori_Load_guarded_General(mce, loadedTy,
6873 lg->addr, 0/*addr bias*/,
6874 lg->guard, ori_alt );
6875 /* And finally, bind the origin to the destination temporary. */
6876 assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final );
sewardjdb5907d2009-11-26 17:20:21 +00006877}
6878
6879
sewardj7cf4e6b2008-05-01 20:24:26 +00006880static void schemeS ( MCEnv* mce, IRStmt* st )
6881{
6882 tl_assert(MC_(clo_mc_level) == 3);
6883
6884 switch (st->tag) {
6885
6886 case Ist_AbiHint:
6887 /* The value-check instrumenter handles this - by arranging
6888 to pass the address of the next instruction to
6889 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
6890 happen for origin tracking w.r.t. AbiHints. So there is
6891 nothing to do here. */
6892 break;
6893
6894 case Ist_PutI: {
floriand39b0222012-05-31 15:48:13 +00006895 IRPutI *puti = st->Ist.PutI.details;
sewardj7cf4e6b2008-05-01 20:24:26 +00006896 IRRegArray* descr_b;
6897 IRAtom *t1, *t2, *t3, *t4;
floriand39b0222012-05-31 15:48:13 +00006898 IRRegArray* descr = puti->descr;
sewardj7cf4e6b2008-05-01 20:24:26 +00006899 IRType equivIntTy
6900 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6901 /* If this array is unshadowable for whatever reason,
6902 generate no code. */
6903 if (equivIntTy == Ity_INVALID)
6904 break;
6905 tl_assert(sizeofIRType(equivIntTy) >= 4);
6906 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6907 descr_b
6908 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6909 equivIntTy, descr->nElems );
6910 /* Compute a value to Put - the conjoinment of the origin for
6911 the data to be Put-ted (obviously) and of the index value
6912 (not so obviously). */
floriand39b0222012-05-31 15:48:13 +00006913 t1 = schemeE( mce, puti->data );
6914 t2 = schemeE( mce, puti->ix );
sewardj7cf4e6b2008-05-01 20:24:26 +00006915 t3 = gen_maxU32( mce, t1, t2 );
6916 t4 = zWidenFrom32( mce, equivIntTy, t3 );
floriand39b0222012-05-31 15:48:13 +00006917 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
6918 puti->bias, t4) ));
sewardj7cf4e6b2008-05-01 20:24:26 +00006919 break;
6920 }
sewardjdb5907d2009-11-26 17:20:21 +00006921
sewardj7cf4e6b2008-05-01 20:24:26 +00006922 case Ist_Dirty:
6923 do_origins_Dirty( mce, st->Ist.Dirty.details );
6924 break;
sewardjdb5907d2009-11-26 17:20:21 +00006925
6926 case Ist_Store:
sewardjcafe5052013-01-17 14:24:35 +00006927 do_origins_Store_plain( mce, st->Ist.Store.end,
6928 st->Ist.Store.addr,
6929 st->Ist.Store.data );
6930 break;
6931
6932 case Ist_StoreG:
6933 do_origins_StoreG( mce, st->Ist.StoreG.details );
6934 break;
6935
6936 case Ist_LoadG:
6937 do_origins_LoadG( mce, st->Ist.LoadG.details );
sewardjdb5907d2009-11-26 17:20:21 +00006938 break;
6939
6940 case Ist_LLSC: {
6941 /* In short: treat a load-linked like a normal load followed
6942 by an assignment of the loaded (shadow) data the result
6943 temporary. Treat a store-conditional like a normal store,
6944 and mark the result temporary as defined. */
6945 if (st->Ist.LLSC.storedata == NULL) {
6946 /* Load Linked */
6947 IRType resTy
6948 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
6949 IRExpr* vanillaLoad
6950 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
6951 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6952 || resTy == Ity_I16 || resTy == Ity_I8);
6953 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6954 schemeE(mce, vanillaLoad));
6955 } else {
6956 /* Store conditional */
sewardjcafe5052013-01-17 14:24:35 +00006957 do_origins_Store_plain( mce, st->Ist.LLSC.end,
6958 st->Ist.LLSC.addr,
6959 st->Ist.LLSC.storedata );
sewardjdb5907d2009-11-26 17:20:21 +00006960 /* For the rationale behind this, see comments at the
6961 place where the V-shadow for .result is constructed, in
6962 do_shadow_LLSC. In short, we regard .result as
6963 always-defined. */
6964 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6965 mkU32(0) );
sewardj1c0ce7a2009-07-01 08:10:49 +00006966 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006967 break;
6968 }
sewardjdb5907d2009-11-26 17:20:21 +00006969
sewardj7cf4e6b2008-05-01 20:24:26 +00006970 case Ist_Put: {
6971 Int b_offset
6972 = MC_(get_otrack_shadow_offset)(
6973 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00006974 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00006975 );
6976 if (b_offset >= 0) {
6977 /* FIXME: this isn't an atom! */
6978 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
6979 schemeE( mce, st->Ist.Put.data )) );
6980 }
6981 break;
6982 }
sewardjdb5907d2009-11-26 17:20:21 +00006983
sewardj7cf4e6b2008-05-01 20:24:26 +00006984 case Ist_WrTmp:
6985 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
6986 schemeE(mce, st->Ist.WrTmp.data) );
6987 break;
sewardjdb5907d2009-11-26 17:20:21 +00006988
sewardj7cf4e6b2008-05-01 20:24:26 +00006989 case Ist_MBE:
6990 case Ist_NoOp:
6991 case Ist_Exit:
6992 case Ist_IMark:
6993 break;
sewardjdb5907d2009-11-26 17:20:21 +00006994
sewardj7cf4e6b2008-05-01 20:24:26 +00006995 default:
6996 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
6997 ppIRStmt(st);
6998 VG_(tool_panic)("memcheck:schemeS");
6999 }
7000}
7001
7002
njn25e49d8e72002-09-23 09:36:25 +00007003/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00007004/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00007005/*--------------------------------------------------------------------*/