blob: 0916bf1bd3f868609253a6bd63f04d954ce16257 [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj0f157dd2013-10-18 14:27:36 +000011 Copyright (C) 2000-2013 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000033#include "pub_tool_poolalloc.h" // For mc_include.h
njn1d0825f2006-03-27 11:37:07 +000034#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000035#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000036#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000037#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000038#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000039#include "pub_tool_xarray.h"
40#include "pub_tool_mallocfree.h"
41#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000042
sewardj7cf4e6b2008-05-01 20:24:26 +000043#include "mc_include.h"
44
45
sewardj7ee7d852011-06-16 11:37:21 +000046/* FIXMEs JRS 2011-June-16.
47
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
51*/
52
sewardj992dff92005-10-07 11:08:55 +000053/* This file implements the Memcheck instrumentation, and in
54 particular contains the core of its undefined value detection
55 machinery. For a comprehensive background of the terminology,
56 algorithms and rationale used herein, read:
57
58 Using Valgrind to detect undefined value errors with
59 bit-precision
60
61 Julian Seward and Nicholas Nethercote
62
63 2005 USENIX Annual Technical Conference (General Track),
64 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000065
66 ----
67
68 Here is as good a place as any to record exactly when V bits are and
69 should be checked, why, and what function is responsible.
70
71
72 Memcheck complains when an undefined value is used:
73
74 1. In the condition of a conditional branch. Because it could cause
75 incorrect control flow, and thus cause incorrect externally-visible
76 behaviour. [mc_translate.c:complainIfUndefined]
77
78 2. As an argument to a system call, or as the value that specifies
79 the system call number. Because it could cause an incorrect
80 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
81
82 3. As the address in a load or store. Because it could cause an
83 incorrect value to be used later, which could cause externally-visible
84 behaviour (eg. via incorrect control flow or an incorrect system call
85 argument) [complainIfUndefined]
86
87 4. As the target address of a branch. Because it could cause incorrect
88 control flow. [complainIfUndefined]
89
90 5. As an argument to setenv, unsetenv, or putenv. Because it could put
91 an incorrect value into the external environment.
92 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
93
94 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
95 [complainIfUndefined]
96
97 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
98 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
99 requested it. [in memcheck.h]
100
101
102 Memcheck also complains, but should not, when an undefined value is used:
103
104 8. As the shift value in certain SIMD shift operations (but not in the
105 standard integer shift operations). This inconsistency is due to
106 historical reasons.) [complainIfUndefined]
107
108
109 Memcheck does not complain, but should, when an undefined value is used:
110
111 9. As an input to a client request. Because the client request may
112 affect the visible behaviour -- see bug #144362 for an example
113 involving the malloc replacements in vg_replace_malloc.c and
114 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
115 isn't identified. That bug report also has some info on how to solve
116 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
117
118
119 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000120*/
121
sewardjb9e6d242013-05-11 13:42:08 +0000122/* Generation of addr-definedness, addr-validity and
123 guard-definedness checks pertaining to loads and stores (Iex_Load,
124 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
125 loads/stores) was re-checked 11 May 2013. */
126
sewardj95448072004-11-22 20:19:51 +0000127/*------------------------------------------------------------*/
128/*--- Forward decls ---*/
129/*------------------------------------------------------------*/
130
131struct _MCEnv;
132
sewardj7cf4e6b2008-05-01 20:24:26 +0000133static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000134static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000135static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000136
sewardjb5b87402011-03-07 16:05:35 +0000137static IRExpr *i128_const_zero(void);
sewardj95448072004-11-22 20:19:51 +0000138
139/*------------------------------------------------------------*/
140/*--- Memcheck running state, and tmp management. ---*/
141/*------------------------------------------------------------*/
142
sewardj1c0ce7a2009-07-01 08:10:49 +0000143/* Carries info about a particular tmp. The tmp's number is not
144 recorded, as this is implied by (equal to) its index in the tmpMap
145 in MCEnv. The tmp's type is also not recorded, as this is present
146 in MCEnv.sb->tyenv.
147
148 When .kind is Orig, .shadowV and .shadowB may give the identities
149 of the temps currently holding the associated definedness (shadowV)
150 and origin (shadowB) values, or these may be IRTemp_INVALID if code
151 to compute such values has not yet been emitted.
152
153 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
154 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
155 illogical for a shadow tmp itself to be shadowed.
156*/
157typedef
158 enum { Orig=1, VSh=2, BSh=3 }
159 TempKind;
160
161typedef
162 struct {
163 TempKind kind;
164 IRTemp shadowV;
165 IRTemp shadowB;
166 }
167 TempMapEnt;
168
169
sewardj95448072004-11-22 20:19:51 +0000170/* Carries around state during memcheck instrumentation. */
171typedef
172 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000173 /* MODIFIED: the superblock being constructed. IRStmts are
174 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000175 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000176 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000177
sewardj1c0ce7a2009-07-01 08:10:49 +0000178 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
179 current kind and possibly shadow temps for each temp in the
180 IRSB being constructed. Note that it does not contain the
181 type of each tmp. If you want to know the type, look at the
182 relevant entry in sb->tyenv. It follows that at all times
183 during the instrumentation process, the valid indices for
184 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
185 total number of Orig, V- and B- temps allocated so far.
186
187 The reason for this strange split (types in one place, all
188 other info in another) is that we need the types to be
189 attached to sb so as to make it possible to do
190 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
191 instrumentation process. */
192 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000193
sewardjd5204dc2004-12-31 01:16:11 +0000194 /* MODIFIED: indicates whether "bogus" literals have so far been
195 found. Starts off False, and may change to True. */
sewardj54eac252012-03-27 10:19:39 +0000196 Bool bogusLiterals;
197
198 /* READONLY: indicates whether we should use expensive
199 interpretations of integer adds, since unfortunately LLVM
200 uses them to do ORs in some circumstances. Defaulted to True
201 on MacOS and False everywhere else. */
202 Bool useLLVMworkarounds;
sewardjd5204dc2004-12-31 01:16:11 +0000203
sewardj95448072004-11-22 20:19:51 +0000204 /* READONLY: the guest layout. This indicates which parts of
205 the guest state should be regarded as 'always defined'. */
206 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000207
sewardj95448072004-11-22 20:19:51 +0000208 /* READONLY: the host word type. Needed for constructing
209 arguments of type 'HWord' to be passed to helper functions.
210 Ity_I32 or Ity_I64 only. */
211 IRType hWordTy;
212 }
213 MCEnv;
214
215/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
216 demand), as they are encountered. This is for two reasons.
217
218 (1) (less important reason): Many original tmps are unused due to
219 initial IR optimisation, and we do not want to spaces in tables
220 tracking them.
221
222 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
223 table indexed [0 .. n_types-1], which gives the current shadow for
224 each original tmp, or INVALID_IRTEMP if none is so far assigned.
225 It is necessary to support making multiple assignments to a shadow
226 -- specifically, after testing a shadow for definedness, it needs
227 to be made defined. But IR's SSA property disallows this.
228
229 (2) (more important reason): Therefore, when a shadow needs to get
230 a new value, a new temporary is created, the value is assigned to
231 that, and the tmpMap is updated to reflect the new binding.
232
233 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000234 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000235 there's a read-before-write error in the original tmps. The IR
236 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000237*/
sewardj95448072004-11-22 20:19:51 +0000238
sewardj1c0ce7a2009-07-01 08:10:49 +0000239/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
240 both the table in mce->sb and to our auxiliary mapping. Note that
241 newTemp may cause mce->tmpMap to resize, hence previous results
242 from VG_(indexXA)(mce->tmpMap) are invalidated. */
243static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
244{
245 Word newIx;
246 TempMapEnt ent;
247 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
248 ent.kind = kind;
249 ent.shadowV = IRTemp_INVALID;
250 ent.shadowB = IRTemp_INVALID;
251 newIx = VG_(addToXA)( mce->tmpMap, &ent );
252 tl_assert(newIx == (Word)tmp);
253 return tmp;
254}
255
256
sewardj95448072004-11-22 20:19:51 +0000257/* Find the tmp currently shadowing the given original tmp. If none
258 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000259static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000260{
sewardj1c0ce7a2009-07-01 08:10:49 +0000261 TempMapEnt* ent;
262 /* VG_(indexXA) range-checks 'orig', hence no need to check
263 here. */
264 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
265 tl_assert(ent->kind == Orig);
266 if (ent->shadowV == IRTemp_INVALID) {
267 IRTemp tmpV
268 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
269 /* newTemp may cause mce->tmpMap to resize, hence previous results
270 from VG_(indexXA) are invalid. */
271 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
272 tl_assert(ent->kind == Orig);
273 tl_assert(ent->shadowV == IRTemp_INVALID);
274 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000275 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000276 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000277}
278
sewardj95448072004-11-22 20:19:51 +0000279/* Allocate a new shadow for the given original tmp. This means any
280 previous shadow is abandoned. This is needed because it is
281 necessary to give a new value to a shadow once it has been tested
282 for undefinedness, but unfortunately IR's SSA property disallows
283 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000284 and use that instead.
285
286 This is the same as findShadowTmpV, except we don't bother to see
287 if a shadow temp already existed -- we simply allocate a new one
288 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000289static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000290{
sewardj1c0ce7a2009-07-01 08:10:49 +0000291 TempMapEnt* ent;
292 /* VG_(indexXA) range-checks 'orig', hence no need to check
293 here. */
294 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
295 tl_assert(ent->kind == Orig);
296 if (1) {
297 IRTemp tmpV
298 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
299 /* newTemp may cause mce->tmpMap to resize, hence previous results
300 from VG_(indexXA) are invalid. */
301 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
302 tl_assert(ent->kind == Orig);
303 ent->shadowV = tmpV;
304 }
sewardj95448072004-11-22 20:19:51 +0000305}
306
307
308/*------------------------------------------------------------*/
309/*--- IRAtoms -- a subset of IRExprs ---*/
310/*------------------------------------------------------------*/
311
312/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000313 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000314 input, most of this code deals in atoms. Usefully, a value atom
315 always has a V-value which is also an atom: constants are shadowed
316 by constants, and temps are shadowed by the corresponding shadow
317 temporary. */
318
319typedef IRExpr IRAtom;
320
321/* (used for sanity checks only): is this an atom which looks
322 like it's from original code? */
323static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
324{
325 if (a1->tag == Iex_Const)
326 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000327 if (a1->tag == Iex_RdTmp) {
328 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
329 return ent->kind == Orig;
330 }
sewardj95448072004-11-22 20:19:51 +0000331 return False;
332}
333
334/* (used for sanity checks only): is this an atom which looks
335 like it's from shadow code? */
336static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
337{
338 if (a1->tag == Iex_Const)
339 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000340 if (a1->tag == Iex_RdTmp) {
341 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
342 return ent->kind == VSh || ent->kind == BSh;
343 }
sewardj95448072004-11-22 20:19:51 +0000344 return False;
345}
346
347/* (used for sanity checks only): check that both args are atoms and
348 are identically-kinded. */
349static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
350{
sewardj0b9d74a2006-12-24 02:24:11 +0000351 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000352 return True;
sewardjbef552a2005-08-30 12:54:36 +0000353 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000354 return True;
355 return False;
356}
357
358
359/*------------------------------------------------------------*/
360/*--- Type management ---*/
361/*------------------------------------------------------------*/
362
363/* Shadow state is always accessed using integer types. This returns
364 an integer type with the same size (as per sizeofIRType) as the
365 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj45fa9f42012-05-21 10:18:10 +0000366 I64, I128, V128, V256. */
sewardj95448072004-11-22 20:19:51 +0000367
sewardj7cf4e6b2008-05-01 20:24:26 +0000368static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000369{
370 switch (ty) {
371 case Ity_I1:
372 case Ity_I8:
373 case Ity_I16:
374 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000375 case Ity_I64:
376 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000377 case Ity_F32: return Ity_I32;
sewardjb0ccb4d2012-04-02 10:22:05 +0000378 case Ity_D32: return Ity_I32;
sewardj3245c912004-12-10 14:58:26 +0000379 case Ity_F64: return Ity_I64;
sewardjb0ccb4d2012-04-02 10:22:05 +0000380 case Ity_D64: return Ity_I64;
sewardjb5b87402011-03-07 16:05:35 +0000381 case Ity_F128: return Ity_I128;
sewardjb0ccb4d2012-04-02 10:22:05 +0000382 case Ity_D128: return Ity_I128;
sewardj3245c912004-12-10 14:58:26 +0000383 case Ity_V128: return Ity_V128;
sewardj45fa9f42012-05-21 10:18:10 +0000384 case Ity_V256: return Ity_V256;
sewardj95448072004-11-22 20:19:51 +0000385 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000386 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000387 }
388}
389
390/* Produce a 'defined' value of the given shadow type. Should only be
391 supplied shadow types (Bit/I8/I16/I32/UI64). */
392static IRExpr* definedOfType ( IRType ty ) {
393 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000394 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
395 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
396 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
397 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
398 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
sewardjb5b87402011-03-07 16:05:35 +0000399 case Ity_I128: return i128_const_zero();
sewardj170ee212004-12-10 18:57:51 +0000400 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardj1eb272f2014-01-26 18:36:52 +0000401 case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000));
sewardjf1962d32006-10-19 13:22:16 +0000402 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000403 }
404}
405
406
sewardj95448072004-11-22 20:19:51 +0000407/*------------------------------------------------------------*/
408/*--- Constructing IR fragments ---*/
409/*------------------------------------------------------------*/
410
sewardj95448072004-11-22 20:19:51 +0000411/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000412static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
413 if (mce->trace) {
414 VG_(printf)(" %c: ", cat);
415 ppIRStmt(st);
416 VG_(printf)("\n");
417 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000418 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000419}
420
421/* assign value to tmp */
422static inline
423void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000424 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000425}
sewardj95448072004-11-22 20:19:51 +0000426
427/* build various kinds of expressions */
sewardj57f92b02010-08-22 11:54:14 +0000428#define triop(_op, _arg1, _arg2, _arg3) \
429 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
sewardj95448072004-11-22 20:19:51 +0000430#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
431#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
sewardjcc961652013-01-26 11:49:15 +0000432#define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
sewardj95448072004-11-22 20:19:51 +0000433#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
434#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
435#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
436#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000437#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000438#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000439
sewardj7cf4e6b2008-05-01 20:24:26 +0000440/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000441 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000442 an atom.
443
444 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000445 needs to be. But passing it in is redundant, since we can deduce
446 the type merely by inspecting 'e'. So at least use that fact to
447 assert that the two types agree. */
448static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
449{
450 TempKind k;
451 IRTemp t;
452 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardjb0ccb4d2012-04-02 10:22:05 +0000453
sewardj7cf4e6b2008-05-01 20:24:26 +0000454 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000455 switch (cat) {
456 case 'V': k = VSh; break;
457 case 'B': k = BSh; break;
458 case 'C': k = Orig; break;
459 /* happens when we are making up new "orig"
460 expressions, for IRCAS handling */
461 default: tl_assert(0);
462 }
463 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000464 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000465 return mkexpr(t);
466}
467
468
469/*------------------------------------------------------------*/
sewardjb5b87402011-03-07 16:05:35 +0000470/*--- Helper functions for 128-bit ops ---*/
471/*------------------------------------------------------------*/
sewardj45fa9f42012-05-21 10:18:10 +0000472
sewardjb5b87402011-03-07 16:05:35 +0000473static IRExpr *i128_const_zero(void)
474{
sewardj45fa9f42012-05-21 10:18:10 +0000475 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
476 return binop(Iop_64HLto128, z64, z64);
sewardjb5b87402011-03-07 16:05:35 +0000477}
478
sewardj45fa9f42012-05-21 10:18:10 +0000479/* There are no I128-bit loads and/or stores [as generated by any
480 current front ends]. So we do not need to worry about that in
481 expr2vbits_Load */
482
sewardjb5b87402011-03-07 16:05:35 +0000483
484/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +0000485/*--- Constructing definedness primitive ops ---*/
486/*------------------------------------------------------------*/
487
488/* --------- Defined-if-either-defined --------- */
489
490static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
491 tl_assert(isShadowAtom(mce,a1));
492 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000493 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000494}
495
496static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
497 tl_assert(isShadowAtom(mce,a1));
498 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000499 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000500}
501
502static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
503 tl_assert(isShadowAtom(mce,a1));
504 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000505 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000506}
507
sewardj7010f6e2004-12-10 13:35:22 +0000508static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
509 tl_assert(isShadowAtom(mce,a1));
510 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000511 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000512}
513
sewardj20d38f22005-02-07 23:50:18 +0000514static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000515 tl_assert(isShadowAtom(mce,a1));
516 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000517 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000518}
519
sewardj350e8f72012-06-25 07:52:15 +0000520static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
521 tl_assert(isShadowAtom(mce,a1));
522 tl_assert(isShadowAtom(mce,a2));
523 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
524}
525
sewardj95448072004-11-22 20:19:51 +0000526/* --------- Undefined-if-either-undefined --------- */
527
528static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
529 tl_assert(isShadowAtom(mce,a1));
530 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000531 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000532}
533
534static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
535 tl_assert(isShadowAtom(mce,a1));
536 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000537 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000538}
539
540static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
541 tl_assert(isShadowAtom(mce,a1));
542 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000543 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000544}
545
546static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
547 tl_assert(isShadowAtom(mce,a1));
548 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000549 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000550}
551
sewardjb5b87402011-03-07 16:05:35 +0000552static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
553 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
554 tl_assert(isShadowAtom(mce,a1));
555 tl_assert(isShadowAtom(mce,a2));
556 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
557 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
558 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
559 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
560 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
561 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
562
563 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
564}
565
sewardj20d38f22005-02-07 23:50:18 +0000566static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000567 tl_assert(isShadowAtom(mce,a1));
568 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000569 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000570}
571
sewardj350e8f72012-06-25 07:52:15 +0000572static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
573 tl_assert(isShadowAtom(mce,a1));
574 tl_assert(isShadowAtom(mce,a2));
575 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
576}
577
sewardje50a1b12004-12-17 01:24:54 +0000578static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000579 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000580 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000581 case Ity_I16: return mkUifU16(mce, a1, a2);
582 case Ity_I32: return mkUifU32(mce, a1, a2);
583 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardjb5b87402011-03-07 16:05:35 +0000584 case Ity_I128: return mkUifU128(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000585 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardja2f30952013-03-27 11:40:02 +0000586 case Ity_V256: return mkUifUV256(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000587 default:
588 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
589 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000590 }
591}
592
sewardj95448072004-11-22 20:19:51 +0000593/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000594
sewardj95448072004-11-22 20:19:51 +0000595static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
596 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000597 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000598}
599
600static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
601 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000602 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000603}
604
605static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
606 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000607 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000608}
609
sewardj681be302005-01-15 20:43:58 +0000610static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
611 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000612 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000613}
614
sewardj95448072004-11-22 20:19:51 +0000615/* --------- 'Improvement' functions for AND/OR. --------- */
616
617/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
618 defined (0); all other -> undefined (1).
619*/
620static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000621{
sewardj95448072004-11-22 20:19:51 +0000622 tl_assert(isOriginalAtom(mce, data));
623 tl_assert(isShadowAtom(mce, vbits));
624 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000625 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000626}
njn25e49d8e72002-09-23 09:36:25 +0000627
sewardj95448072004-11-22 20:19:51 +0000628static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629{
630 tl_assert(isOriginalAtom(mce, data));
631 tl_assert(isShadowAtom(mce, vbits));
632 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000633 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000634}
njn25e49d8e72002-09-23 09:36:25 +0000635
sewardj95448072004-11-22 20:19:51 +0000636static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
637{
638 tl_assert(isOriginalAtom(mce, data));
639 tl_assert(isShadowAtom(mce, vbits));
640 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000641 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000642}
njn25e49d8e72002-09-23 09:36:25 +0000643
sewardj7010f6e2004-12-10 13:35:22 +0000644static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
645{
646 tl_assert(isOriginalAtom(mce, data));
647 tl_assert(isShadowAtom(mce, vbits));
648 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000649 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000650}
651
sewardj20d38f22005-02-07 23:50:18 +0000652static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000653{
654 tl_assert(isOriginalAtom(mce, data));
655 tl_assert(isShadowAtom(mce, vbits));
656 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000657 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000658}
659
sewardj350e8f72012-06-25 07:52:15 +0000660static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
661{
662 tl_assert(isOriginalAtom(mce, data));
663 tl_assert(isShadowAtom(mce, vbits));
664 tl_assert(sameKindedAtoms(data, vbits));
665 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
666}
667
sewardj95448072004-11-22 20:19:51 +0000668/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
669 defined (0); all other -> undefined (1).
670*/
671static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
672{
673 tl_assert(isOriginalAtom(mce, data));
674 tl_assert(isShadowAtom(mce, vbits));
675 tl_assert(sameKindedAtoms(data, vbits));
676 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000677 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000678 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000679 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000680 vbits) );
681}
njn25e49d8e72002-09-23 09:36:25 +0000682
sewardj95448072004-11-22 20:19:51 +0000683static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
684{
685 tl_assert(isOriginalAtom(mce, data));
686 tl_assert(isShadowAtom(mce, vbits));
687 tl_assert(sameKindedAtoms(data, vbits));
688 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000689 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000690 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000691 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000692 vbits) );
693}
njn25e49d8e72002-09-23 09:36:25 +0000694
sewardj95448072004-11-22 20:19:51 +0000695static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
696{
697 tl_assert(isOriginalAtom(mce, data));
698 tl_assert(isShadowAtom(mce, vbits));
699 tl_assert(sameKindedAtoms(data, vbits));
700 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000701 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000702 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000703 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000704 vbits) );
705}
706
sewardj7010f6e2004-12-10 13:35:22 +0000707static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
708{
709 tl_assert(isOriginalAtom(mce, data));
710 tl_assert(isShadowAtom(mce, vbits));
711 tl_assert(sameKindedAtoms(data, vbits));
712 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000713 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000714 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000715 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000716 vbits) );
717}
718
sewardj20d38f22005-02-07 23:50:18 +0000719static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000720{
721 tl_assert(isOriginalAtom(mce, data));
722 tl_assert(isShadowAtom(mce, vbits));
723 tl_assert(sameKindedAtoms(data, vbits));
724 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000725 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000726 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000727 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000728 vbits) );
729}
730
sewardj350e8f72012-06-25 07:52:15 +0000731static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
732{
733 tl_assert(isOriginalAtom(mce, data));
734 tl_assert(isShadowAtom(mce, vbits));
735 tl_assert(sameKindedAtoms(data, vbits));
736 return assignNew(
737 'V', mce, Ity_V256,
738 binop(Iop_OrV256,
739 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
740 vbits) );
741}
742
sewardj95448072004-11-22 20:19:51 +0000743/* --------- Pessimising casts. --------- */
744
sewardjb5b87402011-03-07 16:05:35 +0000745/* The function returns an expression of type DST_TY. If any of the VBITS
746 is undefined (value == 1) the resulting expression has all bits set to
747 1. Otherwise, all bits are 0. */
748
sewardj95448072004-11-22 20:19:51 +0000749static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
750{
sewardj4cc684b2007-08-25 23:09:36 +0000751 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000752 IRAtom* tmp1;
sewardj2eecb742012-06-01 16:11:41 +0000753
sewardj95448072004-11-22 20:19:51 +0000754 /* Note, dst_ty is a shadow type, not an original type. */
sewardj95448072004-11-22 20:19:51 +0000755 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000756 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000757
758 /* Fast-track some common cases */
759 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000760 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000761
762 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000763 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000764
765 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj2eecb742012-06-01 16:11:41 +0000766 /* PCast the arg, then clone it. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000767 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
768 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000769 }
770
sewardj1eb272f2014-01-26 18:36:52 +0000771 if (src_ty == Ity_I32 && dst_ty == Ity_V128) {
772 /* PCast the arg, then clone it 4 times. */
773 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
774 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
775 return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
776 }
777
778 if (src_ty == Ity_I32 && dst_ty == Ity_V256) {
779 /* PCast the arg, then clone it 8 times. */
780 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
781 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
782 tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
783 return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp));
784 }
785
sewardj2eecb742012-06-01 16:11:41 +0000786 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
787 /* PCast the arg. This gives all 0s or all 1s. Then throw away
788 the top half. */
789 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
790 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
791 }
792
sewardj4cc684b2007-08-25 23:09:36 +0000793 /* Else do it the slow way .. */
sewardj2eecb742012-06-01 16:11:41 +0000794 /* First of all, collapse vbits down to a single bit. */
sewardj4cc684b2007-08-25 23:09:36 +0000795 tmp1 = NULL;
796 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000797 case Ity_I1:
798 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000799 break;
sewardj95448072004-11-22 20:19:51 +0000800 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000801 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000802 break;
803 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000804 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000805 break;
806 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000807 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000808 break;
809 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000810 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000811 break;
sewardj69a13322005-04-23 01:14:51 +0000812 case Ity_I128: {
813 /* Gah. Chop it in half, OR the halves together, and compare
814 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000815 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
816 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
817 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
818 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000819 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000820 break;
821 }
sewardj95448072004-11-22 20:19:51 +0000822 default:
sewardj4cc684b2007-08-25 23:09:36 +0000823 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000824 VG_(tool_panic)("mkPCastTo(1)");
825 }
826 tl_assert(tmp1);
827 /* Now widen up to the dst type. */
828 switch (dst_ty) {
829 case Ity_I1:
830 return tmp1;
831 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000832 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000833 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000834 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000835 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000836 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000837 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000838 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000839 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000840 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
841 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000842 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000843 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000844 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
845 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000846 return tmp1;
sewardja2f30952013-03-27 11:40:02 +0000847 case Ity_V256:
848 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
849 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
850 tmp1, tmp1));
851 tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
852 tmp1, tmp1));
853 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000854 default:
855 ppIRType(dst_ty);
856 VG_(tool_panic)("mkPCastTo(2)");
857 }
858}
859
sewardjd5204dc2004-12-31 01:16:11 +0000860/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
861/*
862 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
863 PCasting to Ity_U1. However, sometimes it is necessary to be more
864 accurate. The insight is that the result is defined if two
865 corresponding bits can be found, one from each argument, so that
866 both bits are defined but are different -- that makes EQ say "No"
867 and NE say "Yes". Hence, we compute an improvement term and DifD
868 it onto the "normal" (UifU) result.
869
870 The result is:
871
872 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000873 -- naive version
874 PCastTo<sz>( UifU<sz>(vxx, vyy) )
875
sewardjd5204dc2004-12-31 01:16:11 +0000876 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000877
878 -- improvement term
879 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000880 )
sewardje6f8af42005-07-06 18:48:59 +0000881
sewardjd5204dc2004-12-31 01:16:11 +0000882 where
883 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000884 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000885
sewardje6f8af42005-07-06 18:48:59 +0000886 vec = Or<sz>( vxx, // 0 iff bit defined
887 vyy, // 0 iff bit defined
888 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
889 )
890
891 If any bit of vec is 0, the result is defined and so the
892 improvement term should produce 0...0, else it should produce
893 1...1.
894
895 Hence require for the improvement term:
896
897 if vec == 1...1 then 1...1 else 0...0
898 ->
899 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
900
901 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000902*/
903static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
904 IRType ty,
905 IRAtom* vxx, IRAtom* vyy,
906 IRAtom* xx, IRAtom* yy )
907{
sewardje6f8af42005-07-06 18:48:59 +0000908 IRAtom *naive, *vec, *improvement_term;
909 IRAtom *improved, *final_cast, *top;
910 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000911
912 tl_assert(isShadowAtom(mce,vxx));
913 tl_assert(isShadowAtom(mce,vyy));
914 tl_assert(isOriginalAtom(mce,xx));
915 tl_assert(isOriginalAtom(mce,yy));
916 tl_assert(sameKindedAtoms(vxx,xx));
917 tl_assert(sameKindedAtoms(vyy,yy));
918
919 switch (ty) {
sewardj4cfa81b2012-11-08 10:58:16 +0000920 case Ity_I16:
921 opOR = Iop_Or16;
922 opDIFD = Iop_And16;
923 opUIFU = Iop_Or16;
924 opNOT = Iop_Not16;
925 opXOR = Iop_Xor16;
926 opCMP = Iop_CmpEQ16;
927 top = mkU16(0xFFFF);
928 break;
sewardjd5204dc2004-12-31 01:16:11 +0000929 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000930 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000931 opDIFD = Iop_And32;
932 opUIFU = Iop_Or32;
933 opNOT = Iop_Not32;
934 opXOR = Iop_Xor32;
935 opCMP = Iop_CmpEQ32;
936 top = mkU32(0xFFFFFFFF);
937 break;
tomcd986332005-04-26 07:44:48 +0000938 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000939 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000940 opDIFD = Iop_And64;
941 opUIFU = Iop_Or64;
942 opNOT = Iop_Not64;
943 opXOR = Iop_Xor64;
944 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000945 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000946 break;
sewardjd5204dc2004-12-31 01:16:11 +0000947 default:
948 VG_(tool_panic)("expensiveCmpEQorNE");
949 }
950
951 naive
sewardj7cf4e6b2008-05-01 20:24:26 +0000952 = mkPCastTo(mce,ty,
953 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +0000954
955 vec
956 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000957 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000958 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +0000959 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000960 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000961 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +0000962 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +0000963 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +0000964
sewardje6f8af42005-07-06 18:48:59 +0000965 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +0000966 = mkPCastTo( mce,ty,
967 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +0000968
969 improved
sewardj7cf4e6b2008-05-01 20:24:26 +0000970 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000971
972 final_cast
973 = mkPCastTo( mce, Ity_I1, improved );
974
975 return final_cast;
976}
977
sewardj95448072004-11-22 20:19:51 +0000978
sewardj992dff92005-10-07 11:08:55 +0000979/* --------- Semi-accurate interpretation of CmpORD. --------- */
980
981/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
982
983 CmpORD32S(x,y) = 1<<3 if x <s y
984 = 1<<2 if x >s y
985 = 1<<1 if x == y
986
987 and similarly the unsigned variant. The default interpretation is:
988
989 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000990 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000991
992 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
993 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000994
995 Also deal with a special case better:
996
997 CmpORD32S(x,0)
998
999 Here, bit 3 (LT) of the result is a copy of the top bit of x and
1000 will be defined even if the rest of x isn't. In which case we do:
1001
1002 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +00001003 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
1004 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +00001005
sewardj1bc82102005-12-23 00:16:24 +00001006 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +00001007*/
sewardja9e62a92005-10-07 12:13:21 +00001008static Bool isZeroU32 ( IRAtom* e )
1009{
1010 return
1011 toBool( e->tag == Iex_Const
1012 && e->Iex.Const.con->tag == Ico_U32
1013 && e->Iex.Const.con->Ico.U32 == 0 );
1014}
1015
sewardj1bc82102005-12-23 00:16:24 +00001016static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +00001017{
sewardj1bc82102005-12-23 00:16:24 +00001018 return
1019 toBool( e->tag == Iex_Const
1020 && e->Iex.Const.con->tag == Ico_U64
1021 && e->Iex.Const.con->Ico.U64 == 0 );
1022}
1023
1024static IRAtom* doCmpORD ( MCEnv* mce,
1025 IROp cmp_op,
1026 IRAtom* xxhash, IRAtom* yyhash,
1027 IRAtom* xx, IRAtom* yy )
1028{
1029 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
1030 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
1031 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
1032 IROp opAND = m64 ? Iop_And64 : Iop_And32;
1033 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
1034 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
1035 IRType ty = m64 ? Ity_I64 : Ity_I32;
1036 Int width = m64 ? 64 : 32;
1037
1038 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1039
1040 IRAtom* threeLeft1 = NULL;
1041 IRAtom* sevenLeft1 = NULL;
1042
sewardj992dff92005-10-07 11:08:55 +00001043 tl_assert(isShadowAtom(mce,xxhash));
1044 tl_assert(isShadowAtom(mce,yyhash));
1045 tl_assert(isOriginalAtom(mce,xx));
1046 tl_assert(isOriginalAtom(mce,yy));
1047 tl_assert(sameKindedAtoms(xxhash,xx));
1048 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +00001049 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1050 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +00001051
sewardja9e62a92005-10-07 12:13:21 +00001052 if (0) {
1053 ppIROp(cmp_op); VG_(printf)(" ");
1054 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1055 }
1056
sewardj1bc82102005-12-23 00:16:24 +00001057 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +00001058 /* fancy interpretation */
1059 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +00001060 tl_assert(isZero(yyhash));
1061 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +00001062 return
1063 binop(
sewardj1bc82102005-12-23 00:16:24 +00001064 opOR,
sewardja9e62a92005-10-07 12:13:21 +00001065 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001066 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001067 binop(
sewardj1bc82102005-12-23 00:16:24 +00001068 opAND,
1069 mkPCastTo(mce,ty, xxhash),
1070 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +00001071 )),
1072 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001073 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001074 binop(
sewardj1bc82102005-12-23 00:16:24 +00001075 opSHL,
sewardja9e62a92005-10-07 12:13:21 +00001076 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001077 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +00001078 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +00001079 mkU8(3)
1080 ))
1081 );
1082 } else {
1083 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +00001084 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +00001085 return
1086 binop(
sewardj1bc82102005-12-23 00:16:24 +00001087 opAND,
1088 mkPCastTo( mce,ty,
1089 mkUifU(mce,ty, xxhash,yyhash)),
1090 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +00001091 );
1092 }
sewardj992dff92005-10-07 11:08:55 +00001093}
1094
1095
sewardj95448072004-11-22 20:19:51 +00001096/*------------------------------------------------------------*/
1097/*--- Emit a test and complaint if something is undefined. ---*/
1098/*------------------------------------------------------------*/
1099
sewardj7cf4e6b2008-05-01 20:24:26 +00001100static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1101
1102
sewardj95448072004-11-22 20:19:51 +00001103/* Set the annotations on a dirty helper to indicate that the stack
1104 pointer and instruction pointers might be read. This is the
1105 behaviour of all 'emit-a-complaint' style functions we might
1106 call. */
1107
1108static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1109 di->nFxState = 2;
sewardj2eecb742012-06-01 16:11:41 +00001110 di->fxState[0].fx = Ifx_Read;
1111 di->fxState[0].offset = mce->layout->offset_SP;
1112 di->fxState[0].size = mce->layout->sizeof_SP;
1113 di->fxState[0].nRepeats = 0;
1114 di->fxState[0].repeatLen = 0;
1115 di->fxState[1].fx = Ifx_Read;
1116 di->fxState[1].offset = mce->layout->offset_IP;
1117 di->fxState[1].size = mce->layout->sizeof_IP;
1118 di->fxState[1].nRepeats = 0;
1119 di->fxState[1].repeatLen = 0;
sewardj95448072004-11-22 20:19:51 +00001120}
1121
1122
sewardjcafe5052013-01-17 14:24:35 +00001123/* Check the supplied *original* |atom| for undefinedness, and emit a
sewardj95448072004-11-22 20:19:51 +00001124 complaint if so. Once that happens, mark it as defined. This is
1125 possible because the atom is either a tmp or literal. If it's a
1126 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1127 be defined. In fact as mentioned above, we will have to allocate a
1128 new tmp to carry the new 'defined' shadow value, and update the
1129 original->tmp mapping accordingly; we cannot simply assign a new
sewardjcafe5052013-01-17 14:24:35 +00001130 value to an existing shadow tmp as this breaks SSAness.
1131
sewardjb9e6d242013-05-11 13:42:08 +00001132 The checks are performed, any resulting complaint emitted, and
1133 |atom|'s shadow temp set to 'defined', ONLY in the case that
1134 |guard| evaluates to True at run-time. If it evaluates to False
1135 then no action is performed. If |guard| is NULL (the usual case)
1136 then it is assumed to be always-true, and hence these actions are
1137 performed unconditionally.
1138
1139 This routine does not generate code to check the definedness of
1140 |guard|. The caller is assumed to have taken care of that already.
sewardj95448072004-11-22 20:19:51 +00001141*/
sewardjb9e6d242013-05-11 13:42:08 +00001142static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001143{
sewardj7cf97ee2004-11-28 14:25:01 +00001144 IRAtom* vatom;
1145 IRType ty;
1146 Int sz;
1147 IRDirty* di;
1148 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +00001149 IRAtom* origin;
1150 void* fn;
florian6bd9dc12012-11-23 16:17:43 +00001151 const HChar* nm;
sewardj7cf4e6b2008-05-01 20:24:26 +00001152 IRExpr** args;
1153 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001154
njn1d0825f2006-03-27 11:37:07 +00001155 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001156 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001157 return;
1158
sewardjb9e6d242013-05-11 13:42:08 +00001159 if (guard)
1160 tl_assert(isOriginalAtom(mce, guard));
1161
sewardj95448072004-11-22 20:19:51 +00001162 /* Since the original expression is atomic, there's no duplicated
1163 work generated by making multiple V-expressions for it. So we
1164 don't really care about the possibility that someone else may
1165 also create a V-interpretion for it. */
1166 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001167 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001168 tl_assert(isShadowAtom(mce, vatom));
1169 tl_assert(sameKindedAtoms(atom, vatom));
1170
sewardj1c0ce7a2009-07-01 08:10:49 +00001171 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001172
1173 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001174 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001175
sewardj7cf97ee2004-11-28 14:25:01 +00001176 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001177 /* cond will be 0 if all defined, and 1 if any not defined. */
1178
sewardj7cf4e6b2008-05-01 20:24:26 +00001179 /* Get the origin info for the value we are about to check. At
1180 least, if we are doing origin tracking. If not, use a dummy
1181 zero origin. */
1182 if (MC_(clo_mc_level) == 3) {
1183 origin = schemeE( mce, atom );
1184 if (mce->hWordTy == Ity_I64) {
1185 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1186 }
1187 } else {
1188 origin = NULL;
1189 }
1190
1191 fn = NULL;
1192 nm = NULL;
1193 args = NULL;
1194 nargs = -1;
1195
sewardj95448072004-11-22 20:19:51 +00001196 switch (sz) {
1197 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001198 if (origin) {
1199 fn = &MC_(helperc_value_check0_fail_w_o);
1200 nm = "MC_(helperc_value_check0_fail_w_o)";
1201 args = mkIRExprVec_1(origin);
1202 nargs = 1;
1203 } else {
1204 fn = &MC_(helperc_value_check0_fail_no_o);
1205 nm = "MC_(helperc_value_check0_fail_no_o)";
1206 args = mkIRExprVec_0();
1207 nargs = 0;
1208 }
sewardj95448072004-11-22 20:19:51 +00001209 break;
1210 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001211 if (origin) {
1212 fn = &MC_(helperc_value_check1_fail_w_o);
1213 nm = "MC_(helperc_value_check1_fail_w_o)";
1214 args = mkIRExprVec_1(origin);
1215 nargs = 1;
1216 } else {
1217 fn = &MC_(helperc_value_check1_fail_no_o);
1218 nm = "MC_(helperc_value_check1_fail_no_o)";
1219 args = mkIRExprVec_0();
1220 nargs = 0;
1221 }
sewardj95448072004-11-22 20:19:51 +00001222 break;
1223 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001224 if (origin) {
1225 fn = &MC_(helperc_value_check4_fail_w_o);
1226 nm = "MC_(helperc_value_check4_fail_w_o)";
1227 args = mkIRExprVec_1(origin);
1228 nargs = 1;
1229 } else {
1230 fn = &MC_(helperc_value_check4_fail_no_o);
1231 nm = "MC_(helperc_value_check4_fail_no_o)";
1232 args = mkIRExprVec_0();
1233 nargs = 0;
1234 }
sewardj95448072004-11-22 20:19:51 +00001235 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001236 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001237 if (origin) {
1238 fn = &MC_(helperc_value_check8_fail_w_o);
1239 nm = "MC_(helperc_value_check8_fail_w_o)";
1240 args = mkIRExprVec_1(origin);
1241 nargs = 1;
1242 } else {
1243 fn = &MC_(helperc_value_check8_fail_no_o);
1244 nm = "MC_(helperc_value_check8_fail_no_o)";
1245 args = mkIRExprVec_0();
1246 nargs = 0;
1247 }
sewardj11bcc4e2005-04-23 22:38:38 +00001248 break;
njn4c245e52009-03-15 23:25:38 +00001249 case 2:
1250 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001251 if (origin) {
1252 fn = &MC_(helperc_value_checkN_fail_w_o);
1253 nm = "MC_(helperc_value_checkN_fail_w_o)";
1254 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1255 nargs = 2;
1256 } else {
1257 fn = &MC_(helperc_value_checkN_fail_no_o);
1258 nm = "MC_(helperc_value_checkN_fail_no_o)";
1259 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1260 nargs = 1;
1261 }
sewardj95448072004-11-22 20:19:51 +00001262 break;
njn4c245e52009-03-15 23:25:38 +00001263 default:
1264 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001265 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001266
1267 tl_assert(fn);
1268 tl_assert(nm);
1269 tl_assert(args);
1270 tl_assert(nargs >= 0 && nargs <= 2);
1271 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1272 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1273
1274 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1275 VG_(fnptr_to_fnentry)( fn ), args );
sewardjb9e6d242013-05-11 13:42:08 +00001276 di->guard = cond; // and cond is PCast-to-1(atom#)
1277
1278 /* If the complaint is to be issued under a guard condition, AND
1279 that into the guard condition for the helper call. */
1280 if (guard) {
1281 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1282 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1283 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1284 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1285 }
florian434ffae2012-07-19 17:23:42 +00001286
sewardj95448072004-11-22 20:19:51 +00001287 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001288 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001289
sewardjb9e6d242013-05-11 13:42:08 +00001290 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1291 defined -- but only in the case where the guard evaluates to
1292 True at run-time. Do the update by setting the orig->shadow
1293 mapping for tmp to reflect the fact that this shadow is getting
1294 a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001295 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001296 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001297 if (vatom->tag == Iex_RdTmp) {
1298 tl_assert(atom->tag == Iex_RdTmp);
sewardjb9e6d242013-05-11 13:42:08 +00001299 if (guard == NULL) {
1300 // guard is 'always True', hence update unconditionally
1301 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1302 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1303 definedOfType(ty));
1304 } else {
1305 // update the temp only conditionally. Do this by copying
1306 // its old value when the guard is False.
1307 // The old value ..
1308 IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1309 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1310 IRAtom* new_tmpV
1311 = assignNew('V', mce, shadowTypeV(ty),
1312 IRExpr_ITE(guard, definedOfType(ty),
1313 mkexpr(old_tmpV)));
1314 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV);
1315 }
sewardj95448072004-11-22 20:19:51 +00001316 }
1317}
1318
1319
1320/*------------------------------------------------------------*/
1321/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1322/*------------------------------------------------------------*/
1323
1324/* Examine the always-defined sections declared in layout to see if
1325 the (offset,size) section is within one. Note, is is an error to
1326 partially fall into such a region: (offset,size) should either be
1327 completely in such a region or completely not-in such a region.
1328*/
1329static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1330{
1331 Int minoffD, maxoffD, i;
1332 Int minoff = offset;
1333 Int maxoff = minoff + size - 1;
1334 tl_assert((minoff & ~0xFFFF) == 0);
1335 tl_assert((maxoff & ~0xFFFF) == 0);
1336
1337 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1338 minoffD = mce->layout->alwaysDefd[i].offset;
1339 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1340 tl_assert((minoffD & ~0xFFFF) == 0);
1341 tl_assert((maxoffD & ~0xFFFF) == 0);
1342
1343 if (maxoff < minoffD || maxoffD < minoff)
1344 continue; /* no overlap */
1345 if (minoff >= minoffD && maxoff <= maxoffD)
1346 return True; /* completely contained in an always-defd section */
1347
1348 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1349 }
1350 return False; /* could not find any containing section */
1351}
1352
1353
1354/* Generate into bb suitable actions to shadow this Put. If the state
1355 slice is marked 'always defined', do nothing. Otherwise, write the
1356 supplied V bits to the shadow state. We can pass in either an
1357 original atom or a V-atom, but not both. In the former case the
1358 relevant V-bits are then generated from the original.
florian434ffae2012-07-19 17:23:42 +00001359 We assume here, that the definedness of GUARD has already been checked.
sewardj95448072004-11-22 20:19:51 +00001360*/
1361static
1362void do_shadow_PUT ( MCEnv* mce, Int offset,
florian434ffae2012-07-19 17:23:42 +00001363 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001364{
sewardj7cf97ee2004-11-28 14:25:01 +00001365 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001366
1367 // Don't do shadow PUTs if we're not doing undefined value checking.
1368 // Their absence lets Vex's optimiser remove all the shadow computation
1369 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001370 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001371 return;
1372
sewardj95448072004-11-22 20:19:51 +00001373 if (atom) {
1374 tl_assert(!vatom);
1375 tl_assert(isOriginalAtom(mce, atom));
1376 vatom = expr2vbits( mce, atom );
1377 } else {
1378 tl_assert(vatom);
1379 tl_assert(isShadowAtom(mce, vatom));
1380 }
1381
sewardj1c0ce7a2009-07-01 08:10:49 +00001382 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001383 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001384 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001385 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1386 /* later: no ... */
1387 /* emit code to emit a complaint if any of the vbits are 1. */
1388 /* complainIfUndefined(mce, atom); */
1389 } else {
1390 /* Do a plain shadow Put. */
florian434ffae2012-07-19 17:23:42 +00001391 if (guard) {
1392 /* If the guard expression evaluates to false we simply Put the value
1393 that is already stored in the guest state slot */
1394 IRAtom *cond, *iffalse;
1395
sewardjcc961652013-01-26 11:49:15 +00001396 cond = assignNew('V', mce, Ity_I1, guard);
florian434ffae2012-07-19 17:23:42 +00001397 iffalse = assignNew('V', mce, ty,
1398 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
florian5686b2d2013-01-29 03:57:40 +00001399 vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse));
florian434ffae2012-07-19 17:23:42 +00001400 }
1401 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
sewardj95448072004-11-22 20:19:51 +00001402 }
1403}
1404
1405
1406/* Return an expression which contains the V bits corresponding to the
1407 given GETI (passed in in pieces).
1408*/
1409static
floriand39b0222012-05-31 15:48:13 +00001410void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
sewardj95448072004-11-22 20:19:51 +00001411{
sewardj7cf97ee2004-11-28 14:25:01 +00001412 IRAtom* vatom;
1413 IRType ty, tyS;
1414 Int arrSize;;
floriand39b0222012-05-31 15:48:13 +00001415 IRRegArray* descr = puti->descr;
1416 IRAtom* ix = puti->ix;
1417 Int bias = puti->bias;
1418 IRAtom* atom = puti->data;
sewardj7cf97ee2004-11-28 14:25:01 +00001419
njn1d0825f2006-03-27 11:37:07 +00001420 // Don't do shadow PUTIs if we're not doing undefined value checking.
1421 // Their absence lets Vex's optimiser remove all the shadow computation
1422 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001423 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001424 return;
1425
sewardj95448072004-11-22 20:19:51 +00001426 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001427 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001428 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001429 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001430 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001431 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001432 tl_assert(ty != Ity_I1);
1433 tl_assert(isOriginalAtom(mce,ix));
sewardjb9e6d242013-05-11 13:42:08 +00001434 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001435 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1436 /* later: no ... */
1437 /* emit code to emit a complaint if any of the vbits are 1. */
1438 /* complainIfUndefined(mce, atom); */
1439 } else {
1440 /* Do a cloned version of the Put that refers to the shadow
1441 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001442 IRRegArray* new_descr
1443 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1444 tyS, descr->nElems);
floriand39b0222012-05-31 15:48:13 +00001445 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
sewardj95448072004-11-22 20:19:51 +00001446 }
1447}
1448
1449
1450/* Return an expression which contains the V bits corresponding to the
1451 given GET (passed in in pieces).
1452*/
1453static
1454IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1455{
sewardj7cf4e6b2008-05-01 20:24:26 +00001456 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001457 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001458 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001459 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1460 /* Always defined, return all zeroes of the relevant type */
1461 return definedOfType(tyS);
1462 } else {
1463 /* return a cloned version of the Get that refers to the shadow
1464 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001465 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001466 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1467 }
1468}
1469
1470
1471/* Return an expression which contains the V bits corresponding to the
1472 given GETI (passed in in pieces).
1473*/
1474static
sewardj0b9d74a2006-12-24 02:24:11 +00001475IRExpr* shadow_GETI ( MCEnv* mce,
1476 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001477{
1478 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001479 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001480 Int arrSize = descr->nElems * sizeofIRType(ty);
1481 tl_assert(ty != Ity_I1);
1482 tl_assert(isOriginalAtom(mce,ix));
sewardjb9e6d242013-05-11 13:42:08 +00001483 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001484 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1485 /* Always defined, return all zeroes of the relevant type */
1486 return definedOfType(tyS);
1487 } else {
1488 /* return a cloned version of the Get that refers to the shadow
1489 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001490 IRRegArray* new_descr
1491 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1492 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001493 return IRExpr_GetI( new_descr, ix, bias );
1494 }
1495}
1496
1497
1498/*------------------------------------------------------------*/
1499/*--- Generating approximations for unknown operations, ---*/
1500/*--- using lazy-propagate semantics ---*/
1501/*------------------------------------------------------------*/
1502
1503/* Lazy propagation of undefinedness from two values, resulting in the
1504 specified shadow type.
1505*/
1506static
1507IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1508{
sewardj95448072004-11-22 20:19:51 +00001509 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001510 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1511 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001512 tl_assert(isShadowAtom(mce,va1));
1513 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001514
1515 /* The general case is inefficient because PCast is an expensive
1516 operation. Here are some special cases which use PCast only
1517 once rather than twice. */
1518
1519 /* I64 x I64 -> I64 */
1520 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1521 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1522 at = mkUifU(mce, Ity_I64, va1, va2);
1523 at = mkPCastTo(mce, Ity_I64, at);
1524 return at;
1525 }
1526
1527 /* I64 x I64 -> I32 */
1528 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1529 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1530 at = mkUifU(mce, Ity_I64, va1, va2);
1531 at = mkPCastTo(mce, Ity_I32, at);
1532 return at;
1533 }
1534
1535 if (0) {
1536 VG_(printf)("mkLazy2 ");
1537 ppIRType(t1);
1538 VG_(printf)("_");
1539 ppIRType(t2);
1540 VG_(printf)("_");
1541 ppIRType(finalVty);
1542 VG_(printf)("\n");
1543 }
1544
1545 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001546 at = mkPCastTo(mce, Ity_I32, va1);
1547 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1548 at = mkPCastTo(mce, finalVty, at);
1549 return at;
1550}
1551
1552
sewardjed69fdb2006-02-03 16:12:27 +00001553/* 3-arg version of the above. */
1554static
1555IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1556 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1557{
1558 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001559 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1560 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1561 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001562 tl_assert(isShadowAtom(mce,va1));
1563 tl_assert(isShadowAtom(mce,va2));
1564 tl_assert(isShadowAtom(mce,va3));
1565
1566 /* The general case is inefficient because PCast is an expensive
1567 operation. Here are some special cases which use PCast only
1568 twice rather than three times. */
1569
1570 /* I32 x I64 x I64 -> I64 */
1571 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1572 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1573 && finalVty == Ity_I64) {
1574 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1575 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1576 mode indication which is fully defined, this should get
1577 folded out later. */
1578 at = mkPCastTo(mce, Ity_I64, va1);
1579 /* Now fold in 2nd and 3rd args. */
1580 at = mkUifU(mce, Ity_I64, at, va2);
1581 at = mkUifU(mce, Ity_I64, at, va3);
1582 /* and PCast once again. */
1583 at = mkPCastTo(mce, Ity_I64, at);
1584 return at;
1585 }
1586
carllfb583cb2013-01-22 20:26:34 +00001587 /* I32 x I8 x I64 -> I64 */
1588 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64
1589 && finalVty == Ity_I64) {
1590 if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n");
1591 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a
1592 * rounding mode indication which is fully defined, this should
1593 * get folded out later.
1594 */
1595 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1596 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1597 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1598 at = mkUifU(mce, Ity_I64, at, va3);
1599 /* and PCast once again. */
1600 at = mkPCastTo(mce, Ity_I64, at);
1601 return at;
1602 }
1603
sewardj453e8f82006-02-09 03:25:06 +00001604 /* I32 x I64 x I64 -> I32 */
1605 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1606 && finalVty == Ity_I32) {
sewardj59570ff2010-01-01 11:59:33 +00001607 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
sewardj453e8f82006-02-09 03:25:06 +00001608 at = mkPCastTo(mce, Ity_I64, va1);
1609 at = mkUifU(mce, Ity_I64, at, va2);
1610 at = mkUifU(mce, Ity_I64, at, va3);
1611 at = mkPCastTo(mce, Ity_I32, at);
1612 return at;
1613 }
1614
sewardj59570ff2010-01-01 11:59:33 +00001615 /* I32 x I32 x I32 -> I32 */
1616 /* 32-bit FP idiom, as (eg) happens on ARM */
1617 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1618 && finalVty == Ity_I32) {
1619 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1620 at = va1;
1621 at = mkUifU(mce, Ity_I32, at, va2);
1622 at = mkUifU(mce, Ity_I32, at, va3);
1623 at = mkPCastTo(mce, Ity_I32, at);
1624 return at;
1625 }
1626
sewardjb5b87402011-03-07 16:05:35 +00001627 /* I32 x I128 x I128 -> I128 */
1628 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1629 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1630 && finalVty == Ity_I128) {
1631 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1632 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1633 mode indication which is fully defined, this should get
1634 folded out later. */
1635 at = mkPCastTo(mce, Ity_I128, va1);
1636 /* Now fold in 2nd and 3rd args. */
1637 at = mkUifU(mce, Ity_I128, at, va2);
1638 at = mkUifU(mce, Ity_I128, at, va3);
1639 /* and PCast once again. */
1640 at = mkPCastTo(mce, Ity_I128, at);
1641 return at;
1642 }
carllfb583cb2013-01-22 20:26:34 +00001643
1644 /* I32 x I8 x I128 -> I128 */
1645 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1646 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128
1647 && finalVty == Ity_I128) {
1648 if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n");
sewardja28c43c2013-01-29 17:18:56 +00001649 /* Use I64 as an intermediate type, which means PCasting all 3
1650 args to I64 to start with. 1st arg is typically a rounding
1651 mode indication which is fully defined, so we hope that it
1652 will get folded out later. */
carllfb583cb2013-01-22 20:26:34 +00001653 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1654 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
sewardja28c43c2013-01-29 17:18:56 +00001655 IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3);
1656 /* Now UifU all three together. */
carllfb583cb2013-01-22 20:26:34 +00001657 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
sewardja28c43c2013-01-29 17:18:56 +00001658 at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3)
carllfb583cb2013-01-22 20:26:34 +00001659 /* and PCast once again. */
1660 at = mkPCastTo(mce, Ity_I128, at);
1661 return at;
1662 }
sewardj453e8f82006-02-09 03:25:06 +00001663 if (1) {
1664 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001665 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001666 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001667 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001668 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001669 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001670 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001671 ppIRType(finalVty);
1672 VG_(printf)("\n");
1673 }
1674
sewardj453e8f82006-02-09 03:25:06 +00001675 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001676 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001677 /*
sewardjed69fdb2006-02-03 16:12:27 +00001678 at = mkPCastTo(mce, Ity_I32, va1);
1679 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1680 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1681 at = mkPCastTo(mce, finalVty, at);
1682 return at;
sewardj453e8f82006-02-09 03:25:06 +00001683 */
sewardjed69fdb2006-02-03 16:12:27 +00001684}
1685
1686
sewardje91cea72006-02-08 19:32:02 +00001687/* 4-arg version of the above. */
1688static
1689IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1690 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1691{
1692 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001693 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1694 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1695 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1696 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001697 tl_assert(isShadowAtom(mce,va1));
1698 tl_assert(isShadowAtom(mce,va2));
1699 tl_assert(isShadowAtom(mce,va3));
1700 tl_assert(isShadowAtom(mce,va4));
1701
1702 /* The general case is inefficient because PCast is an expensive
1703 operation. Here are some special cases which use PCast only
1704 twice rather than three times. */
1705
1706 /* I32 x I64 x I64 x I64 -> I64 */
1707 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1708 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1709 && finalVty == Ity_I64) {
1710 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1711 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1712 mode indication which is fully defined, this should get
1713 folded out later. */
1714 at = mkPCastTo(mce, Ity_I64, va1);
1715 /* Now fold in 2nd, 3rd, 4th args. */
1716 at = mkUifU(mce, Ity_I64, at, va2);
1717 at = mkUifU(mce, Ity_I64, at, va3);
1718 at = mkUifU(mce, Ity_I64, at, va4);
1719 /* and PCast once again. */
1720 at = mkPCastTo(mce, Ity_I64, at);
1721 return at;
1722 }
sewardjb5b87402011-03-07 16:05:35 +00001723 /* I32 x I32 x I32 x I32 -> I32 */
1724 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1725 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1726 && finalVty == Ity_I32) {
1727 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1728 at = va1;
1729 /* Now fold in 2nd, 3rd, 4th args. */
1730 at = mkUifU(mce, Ity_I32, at, va2);
1731 at = mkUifU(mce, Ity_I32, at, va3);
1732 at = mkUifU(mce, Ity_I32, at, va4);
1733 at = mkPCastTo(mce, Ity_I32, at);
1734 return at;
1735 }
sewardje91cea72006-02-08 19:32:02 +00001736
1737 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001738 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001739 ppIRType(t1);
1740 VG_(printf)(" x ");
1741 ppIRType(t2);
1742 VG_(printf)(" x ");
1743 ppIRType(t3);
1744 VG_(printf)(" x ");
1745 ppIRType(t4);
1746 VG_(printf)(" -> ");
1747 ppIRType(finalVty);
1748 VG_(printf)("\n");
1749 }
1750
1751 tl_assert(0);
1752}
1753
1754
sewardj95448072004-11-22 20:19:51 +00001755/* Do the lazy propagation game from a null-terminated vector of
1756 atoms. This is presumably the arguments to a helper call, so the
1757 IRCallee info is also supplied in order that we can know which
1758 arguments should be ignored (via the .mcx_mask field).
1759*/
1760static
1761IRAtom* mkLazyN ( MCEnv* mce,
1762 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1763{
sewardj4cc684b2007-08-25 23:09:36 +00001764 Int i;
sewardj95448072004-11-22 20:19:51 +00001765 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001766 IRAtom* curr;
1767 IRType mergeTy;
sewardj99430032011-05-04 09:09:31 +00001768 Bool mergeTy64 = True;
sewardj4cc684b2007-08-25 23:09:36 +00001769
1770 /* Decide on the type of the merge intermediary. If all relevant
1771 args are I64, then it's I64. In all other circumstances, use
1772 I32. */
1773 for (i = 0; exprvec[i]; i++) {
1774 tl_assert(i < 32);
1775 tl_assert(isOriginalAtom(mce, exprvec[i]));
1776 if (cee->mcx_mask & (1<<i))
1777 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001778 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001779 mergeTy64 = False;
1780 }
1781
1782 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1783 curr = definedOfType(mergeTy);
1784
sewardj95448072004-11-22 20:19:51 +00001785 for (i = 0; exprvec[i]; i++) {
1786 tl_assert(i < 32);
1787 tl_assert(isOriginalAtom(mce, exprvec[i]));
1788 /* Only take notice of this arg if the callee's mc-exclusion
1789 mask does not say it is to be excluded. */
1790 if (cee->mcx_mask & (1<<i)) {
1791 /* the arg is to be excluded from definedness checking. Do
1792 nothing. */
1793 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1794 } else {
1795 /* calculate the arg's definedness, and pessimistically merge
1796 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001797 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1798 curr = mergeTy64
1799 ? mkUifU64(mce, here, curr)
1800 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001801 }
1802 }
1803 return mkPCastTo(mce, finalVtype, curr );
1804}
1805
1806
1807/*------------------------------------------------------------*/
1808/*--- Generating expensive sequences for exact carry-chain ---*/
1809/*--- propagation in add/sub and related operations. ---*/
1810/*------------------------------------------------------------*/
1811
1812static
sewardjd5204dc2004-12-31 01:16:11 +00001813IRAtom* expensiveAddSub ( MCEnv* mce,
1814 Bool add,
1815 IRType ty,
1816 IRAtom* qaa, IRAtom* qbb,
1817 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001818{
sewardj7cf97ee2004-11-28 14:25:01 +00001819 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001820 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001821
sewardj95448072004-11-22 20:19:51 +00001822 tl_assert(isShadowAtom(mce,qaa));
1823 tl_assert(isShadowAtom(mce,qbb));
1824 tl_assert(isOriginalAtom(mce,aa));
1825 tl_assert(isOriginalAtom(mce,bb));
1826 tl_assert(sameKindedAtoms(qaa,aa));
1827 tl_assert(sameKindedAtoms(qbb,bb));
1828
sewardjd5204dc2004-12-31 01:16:11 +00001829 switch (ty) {
1830 case Ity_I32:
1831 opAND = Iop_And32;
1832 opOR = Iop_Or32;
1833 opXOR = Iop_Xor32;
1834 opNOT = Iop_Not32;
1835 opADD = Iop_Add32;
1836 opSUB = Iop_Sub32;
1837 break;
tomd9774d72005-06-27 08:11:01 +00001838 case Ity_I64:
1839 opAND = Iop_And64;
1840 opOR = Iop_Or64;
1841 opXOR = Iop_Xor64;
1842 opNOT = Iop_Not64;
1843 opADD = Iop_Add64;
1844 opSUB = Iop_Sub64;
1845 break;
sewardjd5204dc2004-12-31 01:16:11 +00001846 default:
1847 VG_(tool_panic)("expensiveAddSub");
1848 }
sewardj95448072004-11-22 20:19:51 +00001849
1850 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001851 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001852 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001853 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001854
1855 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001856 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001857 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001858 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001859
1860 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001861 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001862
1863 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001864 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001865
sewardjd5204dc2004-12-31 01:16:11 +00001866 if (add) {
1867 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1868 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001869 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001870 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001871 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1872 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001873 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001874 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1875 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001876 )
sewardj95448072004-11-22 20:19:51 +00001877 )
sewardjd5204dc2004-12-31 01:16:11 +00001878 )
1879 );
1880 } else {
1881 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1882 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001883 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001884 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001885 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1886 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001887 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001888 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1889 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001890 )
1891 )
1892 )
1893 );
1894 }
1895
sewardj95448072004-11-22 20:19:51 +00001896}
1897
1898
sewardj4cfa81b2012-11-08 10:58:16 +00001899static
1900IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
1901 IRAtom* atom, IRAtom* vatom )
1902{
1903 IRType ty;
1904 IROp xorOp, subOp, andOp;
1905 IRExpr *one;
1906 IRAtom *improver, *improved;
1907 tl_assert(isShadowAtom(mce,vatom));
1908 tl_assert(isOriginalAtom(mce,atom));
1909 tl_assert(sameKindedAtoms(atom,vatom));
1910
1911 switch (czop) {
1912 case Iop_Ctz32:
1913 ty = Ity_I32;
1914 xorOp = Iop_Xor32;
1915 subOp = Iop_Sub32;
1916 andOp = Iop_And32;
1917 one = mkU32(1);
1918 break;
1919 case Iop_Ctz64:
1920 ty = Ity_I64;
1921 xorOp = Iop_Xor64;
1922 subOp = Iop_Sub64;
1923 andOp = Iop_And64;
1924 one = mkU64(1);
1925 break;
1926 default:
1927 ppIROp(czop);
1928 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
1929 }
1930
1931 // improver = atom ^ (atom - 1)
1932 //
1933 // That is, improver has its low ctz(atom) bits equal to one;
1934 // higher bits (if any) equal to zero.
1935 improver = assignNew('V', mce,ty,
1936 binop(xorOp,
1937 atom,
1938 assignNew('V', mce, ty,
1939 binop(subOp, atom, one))));
1940
1941 // improved = vatom & improver
1942 //
1943 // That is, treat any V bits above the first ctz(atom) bits as
1944 // "defined".
1945 improved = assignNew('V', mce, ty,
1946 binop(andOp, vatom, improver));
1947
1948 // Return pessimizing cast of improved.
1949 return mkPCastTo(mce, ty, improved);
1950}
1951
1952
sewardj95448072004-11-22 20:19:51 +00001953/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001954/*--- Scalar shifts. ---*/
1955/*------------------------------------------------------------*/
1956
1957/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1958 idea is to shift the definedness bits by the original shift amount.
1959 This introduces 0s ("defined") in new positions for left shifts and
1960 unsigned right shifts, and copies the top definedness bit for
1961 signed right shifts. So, conveniently, applying the original shift
1962 operator to the definedness bits for the left arg is exactly the
1963 right thing to do:
1964
1965 (qaa << bb)
1966
1967 However if the shift amount is undefined then the whole result
1968 is undefined. Hence need:
1969
1970 (qaa << bb) `UifU` PCast(qbb)
1971
1972 If the shift amount bb is a literal than qbb will say 'all defined'
1973 and the UifU and PCast will get folded out by post-instrumentation
1974 optimisation.
1975*/
1976static IRAtom* scalarShift ( MCEnv* mce,
1977 IRType ty,
1978 IROp original_op,
1979 IRAtom* qaa, IRAtom* qbb,
1980 IRAtom* aa, IRAtom* bb )
1981{
1982 tl_assert(isShadowAtom(mce,qaa));
1983 tl_assert(isShadowAtom(mce,qbb));
1984 tl_assert(isOriginalAtom(mce,aa));
1985 tl_assert(isOriginalAtom(mce,bb));
1986 tl_assert(sameKindedAtoms(qaa,aa));
1987 tl_assert(sameKindedAtoms(qbb,bb));
1988 return
1989 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001990 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00001991 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00001992 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00001993 mkPCastTo(mce, ty, qbb)
1994 )
1995 );
1996}
1997
1998
1999/*------------------------------------------------------------*/
2000/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00002001/*------------------------------------------------------------*/
2002
sewardja1d93302004-12-12 16:45:06 +00002003/* Vector pessimisation -- pessimise within each lane individually. */
2004
2005static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
2006{
sewardj7cf4e6b2008-05-01 20:24:26 +00002007 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00002008}
2009
2010static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
2011{
sewardj7cf4e6b2008-05-01 20:24:26 +00002012 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00002013}
2014
2015static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
2016{
sewardj7cf4e6b2008-05-01 20:24:26 +00002017 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00002018}
2019
2020static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
2021{
sewardj7cf4e6b2008-05-01 20:24:26 +00002022 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00002023}
2024
sewardj350e8f72012-06-25 07:52:15 +00002025static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
2026{
2027 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
2028}
2029
2030static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
2031{
2032 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
2033}
2034
sewardjacd2e912005-01-13 19:17:06 +00002035static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
2036{
sewardj7cf4e6b2008-05-01 20:24:26 +00002037 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00002038}
2039
sewardja2f30952013-03-27 11:40:02 +00002040static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at )
2041{
2042 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at));
2043}
2044
sewardjacd2e912005-01-13 19:17:06 +00002045static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
2046{
sewardj7cf4e6b2008-05-01 20:24:26 +00002047 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00002048}
2049
sewardja2f30952013-03-27 11:40:02 +00002050static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at )
2051{
2052 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at));
2053}
2054
sewardjacd2e912005-01-13 19:17:06 +00002055static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
2056{
sewardj7cf4e6b2008-05-01 20:24:26 +00002057 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00002058}
2059
sewardjc678b852010-09-22 00:58:51 +00002060static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
2061{
2062 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
2063}
2064
2065static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
2066{
2067 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
2068}
2069
sewardja1d93302004-12-12 16:45:06 +00002070
sewardj3245c912004-12-10 14:58:26 +00002071/* Here's a simple scheme capable of handling ops derived from SSE1
2072 code and while only generating ops that can be efficiently
2073 implemented in SSE1. */
2074
2075/* All-lanes versions are straightforward:
2076
sewardj20d38f22005-02-07 23:50:18 +00002077 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00002078
2079 unary32Fx4(x,y) ==> PCast32x4(x#)
2080
2081 Lowest-lane-only versions are more complex:
2082
sewardj20d38f22005-02-07 23:50:18 +00002083 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00002084 x#,
sewardj20d38f22005-02-07 23:50:18 +00002085 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00002086 )
2087
2088 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00002089 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00002090 obvious scheme of taking the bottom 32 bits of each operand
2091 and doing a 32-bit UifU. Basically since UifU is fast and
2092 chopping lanes off vector values is slow.
2093
2094 Finally:
2095
sewardj20d38f22005-02-07 23:50:18 +00002096 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00002097 x#,
sewardj20d38f22005-02-07 23:50:18 +00002098 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00002099 )
2100
2101 Where:
2102
2103 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2104 PCast32x4(v#) = CmpNEZ32x4(v#)
2105*/
2106
2107static
2108IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2109{
2110 IRAtom* at;
2111 tl_assert(isShadowAtom(mce, vatomX));
2112 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002113 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002114 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00002115 return at;
2116}
2117
2118static
2119IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
2120{
2121 IRAtom* at;
2122 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002123 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00002124 return at;
2125}
2126
2127static
2128IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2129{
2130 IRAtom* at;
2131 tl_assert(isShadowAtom(mce, vatomX));
2132 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002133 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002134 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00002135 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002136 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00002137 return at;
2138}
2139
2140static
2141IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
2142{
2143 IRAtom* at;
2144 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002145 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00002146 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002147 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00002148 return at;
2149}
2150
sewardj0b070592004-12-10 21:44:22 +00002151/* --- ... and ... 64Fx2 versions of the same ... --- */
2152
2153static
2154IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2155{
2156 IRAtom* at;
2157 tl_assert(isShadowAtom(mce, vatomX));
2158 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002159 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002160 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00002161 return at;
2162}
2163
2164static
2165IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
2166{
2167 IRAtom* at;
2168 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002169 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002170 return at;
2171}
2172
2173static
2174IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2175{
2176 IRAtom* at;
2177 tl_assert(isShadowAtom(mce, vatomX));
2178 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002179 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002180 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00002181 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002182 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002183 return at;
2184}
2185
2186static
2187IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2188{
2189 IRAtom* at;
2190 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002191 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002192 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002193 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002194 return at;
2195}
2196
sewardj57f92b02010-08-22 11:54:14 +00002197/* --- --- ... and ... 32Fx2 versions of the same --- --- */
2198
2199static
2200IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2201{
2202 IRAtom* at;
2203 tl_assert(isShadowAtom(mce, vatomX));
2204 tl_assert(isShadowAtom(mce, vatomY));
2205 at = mkUifU64(mce, vatomX, vatomY);
2206 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2207 return at;
2208}
2209
2210static
2211IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2212{
2213 IRAtom* at;
2214 tl_assert(isShadowAtom(mce, vatomX));
2215 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2216 return at;
2217}
2218
sewardj350e8f72012-06-25 07:52:15 +00002219/* --- ... and ... 64Fx4 versions of the same ... --- */
2220
2221static
2222IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2223{
2224 IRAtom* at;
2225 tl_assert(isShadowAtom(mce, vatomX));
2226 tl_assert(isShadowAtom(mce, vatomY));
2227 at = mkUifUV256(mce, vatomX, vatomY);
2228 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2229 return at;
2230}
2231
2232static
2233IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2234{
2235 IRAtom* at;
2236 tl_assert(isShadowAtom(mce, vatomX));
2237 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2238 return at;
2239}
2240
2241/* --- ... and ... 32Fx8 versions of the same ... --- */
2242
2243static
2244IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2245{
2246 IRAtom* at;
2247 tl_assert(isShadowAtom(mce, vatomX));
2248 tl_assert(isShadowAtom(mce, vatomY));
2249 at = mkUifUV256(mce, vatomX, vatomY);
2250 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2251 return at;
2252}
2253
2254static
2255IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2256{
2257 IRAtom* at;
2258 tl_assert(isShadowAtom(mce, vatomX));
2259 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2260 return at;
2261}
2262
sewardj1eb272f2014-01-26 18:36:52 +00002263/* --- 64Fx2 binary FP ops, with rounding mode --- */
2264
2265static
2266IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM,
2267 IRAtom* vatomX, IRAtom* vatomY )
2268{
2269 /* This is the same as binary64Fx2, except that we subsequently
2270 pessimise vRM (definedness of the rounding mode), widen to 128
2271 bits and UifU it into the result. As with the scalar cases, if
2272 the RM is a constant then it is defined and so this extra bit
2273 will get constant-folded out later. */
2274 // "do" the vector args
2275 IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY);
2276 // PCast the RM, and widen it to 128 bits
2277 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2278 // Roll it into the result
2279 t1 = mkUifUV128(mce, t1, t2);
2280 return t1;
2281}
2282
2283/* --- ... and ... 32Fx4 versions of the same --- */
2284
2285static
2286IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2287 IRAtom* vatomX, IRAtom* vatomY )
2288{
2289 IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY);
2290 // PCast the RM, and widen it to 128 bits
2291 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2292 // Roll it into the result
2293 t1 = mkUifUV128(mce, t1, t2);
2294 return t1;
2295}
2296
2297/* --- ... and ... 64Fx4 versions of the same --- */
2298
2299static
2300IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2301 IRAtom* vatomX, IRAtom* vatomY )
2302{
2303 IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY);
2304 // PCast the RM, and widen it to 256 bits
2305 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2306 // Roll it into the result
2307 t1 = mkUifUV256(mce, t1, t2);
2308 return t1;
2309}
2310
2311/* --- ... and ... 32Fx8 versions of the same --- */
2312
2313static
2314IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM,
2315 IRAtom* vatomX, IRAtom* vatomY )
2316{
2317 IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY);
2318 // PCast the RM, and widen it to 256 bits
2319 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2320 // Roll it into the result
2321 t1 = mkUifUV256(mce, t1, t2);
2322 return t1;
2323}
2324
2325
sewardja1d93302004-12-12 16:45:06 +00002326/* --- --- Vector saturated narrowing --- --- */
2327
sewardjb5a29232011-10-22 09:29:41 +00002328/* We used to do something very clever here, but on closer inspection
2329 (2011-Jun-15), and in particular bug #279698, it turns out to be
2330 wrong. Part of the problem came from the fact that for a long
2331 time, the IR primops to do with saturated narrowing were
2332 underspecified and managed to confuse multiple cases which needed
2333 to be separate: the op names had a signedness qualifier, but in
2334 fact the source and destination signednesses needed to be specified
2335 independently, so the op names really need two independent
2336 signedness specifiers.
sewardja1d93302004-12-12 16:45:06 +00002337
sewardjb5a29232011-10-22 09:29:41 +00002338 As of 2011-Jun-15 (ish) the underspecification was sorted out
2339 properly. The incorrect instrumentation remained, though. That
2340 has now (2011-Oct-22) been fixed.
sewardja1d93302004-12-12 16:45:06 +00002341
sewardjb5a29232011-10-22 09:29:41 +00002342 What we now do is simple:
sewardja1d93302004-12-12 16:45:06 +00002343
sewardjb5a29232011-10-22 09:29:41 +00002344 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2345 number of lanes, X is the source lane width and signedness, and Y
2346 is the destination lane width and signedness. In all cases the
2347 destination lane width is half the source lane width, so the names
2348 have a bit of redundancy, but are at least easy to read.
sewardja1d93302004-12-12 16:45:06 +00002349
sewardjb5a29232011-10-22 09:29:41 +00002350 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2351 to unsigned 16s.
sewardja1d93302004-12-12 16:45:06 +00002352
sewardjb5a29232011-10-22 09:29:41 +00002353 Let Vanilla(OP) be a function that takes OP, one of these
2354 saturating narrowing ops, and produces the same "shaped" narrowing
2355 op which is not saturating, but merely dumps the most significant
2356 bits. "same shape" means that the lane numbers and widths are the
2357 same as with OP.
sewardja1d93302004-12-12 16:45:06 +00002358
sewardjb5a29232011-10-22 09:29:41 +00002359 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2360 = Iop_NarrowBin32to16x8,
2361 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2362 dumping the top half of each lane.
sewardja1d93302004-12-12 16:45:06 +00002363
sewardjb5a29232011-10-22 09:29:41 +00002364 So, with that in place, the scheme is simple, and it is simple to
2365 pessimise each lane individually and then apply Vanilla(OP) so as
2366 to get the result in the right "shape". If the original OP is
2367 QNarrowBinXtoYxZ then we produce
sewardja1d93302004-12-12 16:45:06 +00002368
sewardjb5a29232011-10-22 09:29:41 +00002369 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
sewardj9beeb0a2011-06-15 15:11:07 +00002370
sewardjb5a29232011-10-22 09:29:41 +00002371 or for the case when OP is unary (Iop_QNarrowUn*)
2372
2373 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
sewardja1d93302004-12-12 16:45:06 +00002374*/
2375static
sewardjb5a29232011-10-22 09:29:41 +00002376IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2377{
2378 switch (qnarrowOp) {
2379 /* Binary: (128, 128) -> 128 */
2380 case Iop_QNarrowBin16Sto8Ux16:
2381 case Iop_QNarrowBin16Sto8Sx16:
2382 case Iop_QNarrowBin16Uto8Ux16:
carll62770672013-10-01 15:50:09 +00002383 case Iop_QNarrowBin64Sto32Sx4:
2384 case Iop_QNarrowBin64Uto32Ux4:
sewardjb5a29232011-10-22 09:29:41 +00002385 return Iop_NarrowBin16to8x16;
2386 case Iop_QNarrowBin32Sto16Ux8:
2387 case Iop_QNarrowBin32Sto16Sx8:
2388 case Iop_QNarrowBin32Uto16Ux8:
2389 return Iop_NarrowBin32to16x8;
2390 /* Binary: (64, 64) -> 64 */
2391 case Iop_QNarrowBin32Sto16Sx4:
2392 return Iop_NarrowBin32to16x4;
2393 case Iop_QNarrowBin16Sto8Ux8:
2394 case Iop_QNarrowBin16Sto8Sx8:
2395 return Iop_NarrowBin16to8x8;
2396 /* Unary: 128 -> 64 */
2397 case Iop_QNarrowUn64Uto32Ux2:
2398 case Iop_QNarrowUn64Sto32Sx2:
2399 case Iop_QNarrowUn64Sto32Ux2:
2400 return Iop_NarrowUn64to32x2;
2401 case Iop_QNarrowUn32Uto16Ux4:
2402 case Iop_QNarrowUn32Sto16Sx4:
2403 case Iop_QNarrowUn32Sto16Ux4:
2404 return Iop_NarrowUn32to16x4;
2405 case Iop_QNarrowUn16Uto8Ux8:
2406 case Iop_QNarrowUn16Sto8Sx8:
2407 case Iop_QNarrowUn16Sto8Ux8:
2408 return Iop_NarrowUn16to8x8;
2409 default:
2410 ppIROp(qnarrowOp);
2411 VG_(tool_panic)("vanillaNarrowOpOfShape");
2412 }
2413}
2414
2415static
sewardj7ee7d852011-06-16 11:37:21 +00002416IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2417 IRAtom* vatom1, IRAtom* vatom2)
sewardja1d93302004-12-12 16:45:06 +00002418{
2419 IRAtom *at1, *at2, *at3;
2420 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2421 switch (narrow_op) {
carll62770672013-10-01 15:50:09 +00002422 case Iop_QNarrowBin64Sto32Sx4: pcast = mkPCast32x4; break;
2423 case Iop_QNarrowBin64Uto32Ux4: pcast = mkPCast32x4; break;
sewardj7ee7d852011-06-16 11:37:21 +00002424 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2425 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2426 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2427 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2428 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2429 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2430 default: VG_(tool_panic)("vectorNarrowBinV128");
sewardja1d93302004-12-12 16:45:06 +00002431 }
sewardjb5a29232011-10-22 09:29:41 +00002432 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardja1d93302004-12-12 16:45:06 +00002433 tl_assert(isShadowAtom(mce,vatom1));
2434 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002435 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2436 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002437 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00002438 return at3;
2439}
2440
sewardjacd2e912005-01-13 19:17:06 +00002441static
sewardj7ee7d852011-06-16 11:37:21 +00002442IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2443 IRAtom* vatom1, IRAtom* vatom2)
sewardjacd2e912005-01-13 19:17:06 +00002444{
2445 IRAtom *at1, *at2, *at3;
2446 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2447 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002448 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2449 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2450 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2451 default: VG_(tool_panic)("vectorNarrowBin64");
sewardjacd2e912005-01-13 19:17:06 +00002452 }
sewardjb5a29232011-10-22 09:29:41 +00002453 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardjacd2e912005-01-13 19:17:06 +00002454 tl_assert(isShadowAtom(mce,vatom1));
2455 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002456 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2457 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002458 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00002459 return at3;
2460}
2461
sewardj57f92b02010-08-22 11:54:14 +00002462static
sewardjb5a29232011-10-22 09:29:41 +00002463IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
sewardj7ee7d852011-06-16 11:37:21 +00002464 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002465{
2466 IRAtom *at1, *at2;
2467 IRAtom* (*pcast)( MCEnv*, IRAtom* );
sewardjb5a29232011-10-22 09:29:41 +00002468 tl_assert(isShadowAtom(mce,vatom1));
2469 /* For vanilla narrowing (non-saturating), we can just apply
2470 the op directly to the V bits. */
2471 switch (narrow_op) {
2472 case Iop_NarrowUn16to8x8:
2473 case Iop_NarrowUn32to16x4:
2474 case Iop_NarrowUn64to32x2:
2475 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2476 return at1;
2477 default:
2478 break; /* Do Plan B */
2479 }
2480 /* Plan B: for ops that involve a saturation operation on the args,
2481 we must PCast before the vanilla narrow. */
2482 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002483 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2484 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2485 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2486 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2487 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2488 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2489 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2490 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2491 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2492 default: VG_(tool_panic)("vectorNarrowUnV128");
sewardj57f92b02010-08-22 11:54:14 +00002493 }
sewardjb5a29232011-10-22 09:29:41 +00002494 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardj57f92b02010-08-22 11:54:14 +00002495 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
sewardjb5a29232011-10-22 09:29:41 +00002496 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
sewardj57f92b02010-08-22 11:54:14 +00002497 return at2;
2498}
2499
2500static
sewardj7ee7d852011-06-16 11:37:21 +00002501IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2502 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002503{
2504 IRAtom *at1, *at2;
2505 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2506 switch (longen_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002507 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2508 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2509 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2510 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2511 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2512 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2513 default: VG_(tool_panic)("vectorWidenI64");
sewardj57f92b02010-08-22 11:54:14 +00002514 }
2515 tl_assert(isShadowAtom(mce,vatom1));
2516 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2517 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2518 return at2;
2519}
2520
sewardja1d93302004-12-12 16:45:06 +00002521
2522/* --- --- Vector integer arithmetic --- --- */
2523
2524/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00002525
sewardja2f30952013-03-27 11:40:02 +00002526/* --- V256-bit versions --- */
2527
2528static
2529IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2530{
2531 IRAtom* at;
2532 at = mkUifUV256(mce, vatom1, vatom2);
2533 at = mkPCast8x32(mce, at);
2534 return at;
2535}
2536
2537static
2538IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2539{
2540 IRAtom* at;
2541 at = mkUifUV256(mce, vatom1, vatom2);
2542 at = mkPCast16x16(mce, at);
2543 return at;
2544}
2545
2546static
2547IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2548{
2549 IRAtom* at;
2550 at = mkUifUV256(mce, vatom1, vatom2);
2551 at = mkPCast32x8(mce, at);
2552 return at;
2553}
2554
2555static
2556IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2557{
2558 IRAtom* at;
2559 at = mkUifUV256(mce, vatom1, vatom2);
2560 at = mkPCast64x4(mce, at);
2561 return at;
2562}
2563
sewardj20d38f22005-02-07 23:50:18 +00002564/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00002565
sewardja1d93302004-12-12 16:45:06 +00002566static
2567IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2568{
2569 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002570 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002571 at = mkPCast8x16(mce, at);
2572 return at;
2573}
2574
2575static
2576IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2577{
2578 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002579 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002580 at = mkPCast16x8(mce, at);
2581 return at;
2582}
2583
2584static
2585IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2586{
2587 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002588 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002589 at = mkPCast32x4(mce, at);
2590 return at;
2591}
2592
2593static
2594IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2595{
2596 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002597 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002598 at = mkPCast64x2(mce, at);
2599 return at;
2600}
sewardj3245c912004-12-10 14:58:26 +00002601
sewardjacd2e912005-01-13 19:17:06 +00002602/* --- 64-bit versions --- */
2603
2604static
2605IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2606{
2607 IRAtom* at;
2608 at = mkUifU64(mce, vatom1, vatom2);
2609 at = mkPCast8x8(mce, at);
2610 return at;
2611}
2612
2613static
2614IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2615{
2616 IRAtom* at;
2617 at = mkUifU64(mce, vatom1, vatom2);
2618 at = mkPCast16x4(mce, at);
2619 return at;
2620}
2621
2622static
2623IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2624{
2625 IRAtom* at;
2626 at = mkUifU64(mce, vatom1, vatom2);
2627 at = mkPCast32x2(mce, at);
2628 return at;
2629}
2630
sewardj57f92b02010-08-22 11:54:14 +00002631static
2632IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2633{
2634 IRAtom* at;
2635 at = mkUifU64(mce, vatom1, vatom2);
2636 at = mkPCastTo(mce, Ity_I64, at);
2637 return at;
2638}
2639
sewardjc678b852010-09-22 00:58:51 +00002640/* --- 32-bit versions --- */
2641
2642static
2643IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2644{
2645 IRAtom* at;
2646 at = mkUifU32(mce, vatom1, vatom2);
2647 at = mkPCast8x4(mce, at);
2648 return at;
2649}
2650
2651static
2652IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2653{
2654 IRAtom* at;
2655 at = mkUifU32(mce, vatom1, vatom2);
2656 at = mkPCast16x2(mce, at);
2657 return at;
2658}
2659
sewardj3245c912004-12-10 14:58:26 +00002660
2661/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002662/*--- Generate shadow values from all kinds of IRExprs. ---*/
2663/*------------------------------------------------------------*/
2664
2665static
sewardje91cea72006-02-08 19:32:02 +00002666IRAtom* expr2vbits_Qop ( MCEnv* mce,
2667 IROp op,
2668 IRAtom* atom1, IRAtom* atom2,
2669 IRAtom* atom3, IRAtom* atom4 )
2670{
2671 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2672 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2673 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2674 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2675
2676 tl_assert(isOriginalAtom(mce,atom1));
2677 tl_assert(isOriginalAtom(mce,atom2));
2678 tl_assert(isOriginalAtom(mce,atom3));
2679 tl_assert(isOriginalAtom(mce,atom4));
2680 tl_assert(isShadowAtom(mce,vatom1));
2681 tl_assert(isShadowAtom(mce,vatom2));
2682 tl_assert(isShadowAtom(mce,vatom3));
2683 tl_assert(isShadowAtom(mce,vatom4));
2684 tl_assert(sameKindedAtoms(atom1,vatom1));
2685 tl_assert(sameKindedAtoms(atom2,vatom2));
2686 tl_assert(sameKindedAtoms(atom3,vatom3));
2687 tl_assert(sameKindedAtoms(atom4,vatom4));
2688 switch (op) {
2689 case Iop_MAddF64:
2690 case Iop_MAddF64r32:
2691 case Iop_MSubF64:
2692 case Iop_MSubF64r32:
2693 /* I32(rm) x F64 x F64 x F64 -> F64 */
2694 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
sewardjb5b87402011-03-07 16:05:35 +00002695
2696 case Iop_MAddF32:
2697 case Iop_MSubF32:
2698 /* I32(rm) x F32 x F32 x F32 -> F32 */
2699 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2700
sewardj350e8f72012-06-25 07:52:15 +00002701 /* V256-bit data-steering */
2702 case Iop_64x4toV256:
2703 return assignNew('V', mce, Ity_V256,
2704 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2705
sewardje91cea72006-02-08 19:32:02 +00002706 default:
2707 ppIROp(op);
2708 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2709 }
2710}
2711
2712
2713static
sewardjed69fdb2006-02-03 16:12:27 +00002714IRAtom* expr2vbits_Triop ( MCEnv* mce,
2715 IROp op,
2716 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2717{
sewardjed69fdb2006-02-03 16:12:27 +00002718 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2719 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2720 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2721
2722 tl_assert(isOriginalAtom(mce,atom1));
2723 tl_assert(isOriginalAtom(mce,atom2));
2724 tl_assert(isOriginalAtom(mce,atom3));
2725 tl_assert(isShadowAtom(mce,vatom1));
2726 tl_assert(isShadowAtom(mce,vatom2));
2727 tl_assert(isShadowAtom(mce,vatom3));
2728 tl_assert(sameKindedAtoms(atom1,vatom1));
2729 tl_assert(sameKindedAtoms(atom2,vatom2));
2730 tl_assert(sameKindedAtoms(atom3,vatom3));
2731 switch (op) {
sewardjb5b87402011-03-07 16:05:35 +00002732 case Iop_AddF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002733 case Iop_AddD128:
sewardjb5b87402011-03-07 16:05:35 +00002734 case Iop_SubF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002735 case Iop_SubD128:
sewardjb5b87402011-03-07 16:05:35 +00002736 case Iop_MulF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002737 case Iop_MulD128:
sewardjb5b87402011-03-07 16:05:35 +00002738 case Iop_DivF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002739 case Iop_DivD128:
sewardj18c72fa2012-04-23 11:22:05 +00002740 case Iop_QuantizeD128:
2741 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
sewardjb5b87402011-03-07 16:05:35 +00002742 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardjed69fdb2006-02-03 16:12:27 +00002743 case Iop_AddF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002744 case Iop_AddD64:
sewardjed69fdb2006-02-03 16:12:27 +00002745 case Iop_AddF64r32:
2746 case Iop_SubF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002747 case Iop_SubD64:
sewardjed69fdb2006-02-03 16:12:27 +00002748 case Iop_SubF64r32:
2749 case Iop_MulF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002750 case Iop_MulD64:
sewardjed69fdb2006-02-03 16:12:27 +00002751 case Iop_MulF64r32:
2752 case Iop_DivF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002753 case Iop_DivD64:
sewardjed69fdb2006-02-03 16:12:27 +00002754 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002755 case Iop_ScaleF64:
2756 case Iop_Yl2xF64:
2757 case Iop_Yl2xp1F64:
2758 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002759 case Iop_PRemF64:
2760 case Iop_PRem1F64:
sewardj18c72fa2012-04-23 11:22:05 +00002761 case Iop_QuantizeD64:
2762 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
sewardjed69fdb2006-02-03 16:12:27 +00002763 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002764 case Iop_PRemC3210F64:
2765 case Iop_PRem1C3210F64:
2766 /* I32(rm) x F64 x F64 -> I32 */
2767 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj59570ff2010-01-01 11:59:33 +00002768 case Iop_AddF32:
2769 case Iop_SubF32:
2770 case Iop_MulF32:
2771 case Iop_DivF32:
2772 /* I32(rm) x F32 x F32 -> I32 */
2773 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj18c72fa2012-04-23 11:22:05 +00002774 case Iop_SignificanceRoundD64:
florian733b4db2013-06-06 19:13:29 +00002775 /* IRRoundingMode(I32) x I8 x D64 -> D64 */
sewardj18c72fa2012-04-23 11:22:05 +00002776 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2777 case Iop_SignificanceRoundD128:
florian733b4db2013-06-06 19:13:29 +00002778 /* IRRoundingMode(I32) x I8 x D128 -> D128 */
sewardj18c72fa2012-04-23 11:22:05 +00002779 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardj57f92b02010-08-22 11:54:14 +00002780 case Iop_ExtractV128:
sewardjb9e6d242013-05-11 13:42:08 +00002781 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002782 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2783 case Iop_Extract64:
sewardjb9e6d242013-05-11 13:42:08 +00002784 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002785 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2786 case Iop_SetElem8x8:
2787 case Iop_SetElem16x4:
2788 case Iop_SetElem32x2:
sewardjb9e6d242013-05-11 13:42:08 +00002789 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002790 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
carll24e40de2013-10-15 18:13:21 +00002791 /* BCDIops */
2792 case Iop_BCDAdd:
2793 case Iop_BCDSub:
2794 complainIfUndefined(mce, atom3, NULL);
2795 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2796
sewardj1eb272f2014-01-26 18:36:52 +00002797 /* Vector FP with rounding mode as the first arg */
2798 case Iop_Add64Fx2:
2799 case Iop_Sub64Fx2:
2800 case Iop_Mul64Fx2:
2801 case Iop_Div64Fx2:
2802 return binary64Fx2_w_rm(mce, vatom1, vatom2, vatom3);
2803
2804 case Iop_Add32Fx4:
2805 case Iop_Sub32Fx4:
2806 case Iop_Mul32Fx4:
2807 case Iop_Div32Fx4:
2808 return binary32Fx4_w_rm(mce, vatom1, vatom2, vatom3);
2809
2810 case Iop_Add64Fx4:
2811 case Iop_Sub64Fx4:
2812 case Iop_Mul64Fx4:
2813 case Iop_Div64Fx4:
2814 return binary64Fx4_w_rm(mce, vatom1, vatom2, vatom3);
2815
2816 case Iop_Add32Fx8:
2817 case Iop_Sub32Fx8:
2818 case Iop_Mul32Fx8:
2819 case Iop_Div32Fx8:
2820 return binary32Fx8_w_rm(mce, vatom1, vatom2, vatom3);
2821
sewardjed69fdb2006-02-03 16:12:27 +00002822 default:
2823 ppIROp(op);
2824 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2825 }
2826}
2827
2828
2829static
sewardj95448072004-11-22 20:19:51 +00002830IRAtom* expr2vbits_Binop ( MCEnv* mce,
2831 IROp op,
2832 IRAtom* atom1, IRAtom* atom2 )
2833{
2834 IRType and_or_ty;
2835 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2836 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2837 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2838
2839 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2840 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2841
2842 tl_assert(isOriginalAtom(mce,atom1));
2843 tl_assert(isOriginalAtom(mce,atom2));
2844 tl_assert(isShadowAtom(mce,vatom1));
2845 tl_assert(isShadowAtom(mce,vatom2));
2846 tl_assert(sameKindedAtoms(atom1,vatom1));
2847 tl_assert(sameKindedAtoms(atom2,vatom2));
2848 switch (op) {
2849
sewardjc678b852010-09-22 00:58:51 +00002850 /* 32-bit SIMD */
2851
2852 case Iop_Add16x2:
2853 case Iop_HAdd16Ux2:
2854 case Iop_HAdd16Sx2:
2855 case Iop_Sub16x2:
2856 case Iop_HSub16Ux2:
2857 case Iop_HSub16Sx2:
2858 case Iop_QAdd16Sx2:
2859 case Iop_QSub16Sx2:
sewardj9fb31092012-09-17 15:28:46 +00002860 case Iop_QSub16Ux2:
sewardj7a370652013-07-04 20:37:33 +00002861 case Iop_QAdd16Ux2:
sewardjc678b852010-09-22 00:58:51 +00002862 return binary16Ix2(mce, vatom1, vatom2);
2863
2864 case Iop_Add8x4:
2865 case Iop_HAdd8Ux4:
2866 case Iop_HAdd8Sx4:
2867 case Iop_Sub8x4:
2868 case Iop_HSub8Ux4:
2869 case Iop_HSub8Sx4:
2870 case Iop_QSub8Ux4:
2871 case Iop_QAdd8Ux4:
2872 case Iop_QSub8Sx4:
2873 case Iop_QAdd8Sx4:
2874 return binary8Ix4(mce, vatom1, vatom2);
2875
sewardjacd2e912005-01-13 19:17:06 +00002876 /* 64-bit SIMD */
2877
sewardj57f92b02010-08-22 11:54:14 +00002878 case Iop_ShrN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002879 case Iop_ShrN16x4:
2880 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002881 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002882 case Iop_SarN16x4:
2883 case Iop_SarN32x2:
2884 case Iop_ShlN16x4:
2885 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002886 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002887 /* Same scheme as with all other shifts. */
sewardjb9e6d242013-05-11 13:42:08 +00002888 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00002889 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002890
sewardj7ee7d852011-06-16 11:37:21 +00002891 case Iop_QNarrowBin32Sto16Sx4:
2892 case Iop_QNarrowBin16Sto8Sx8:
2893 case Iop_QNarrowBin16Sto8Ux8:
2894 return vectorNarrowBin64(mce, op, vatom1, vatom2);
sewardjacd2e912005-01-13 19:17:06 +00002895
2896 case Iop_Min8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002897 case Iop_Min8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002898 case Iop_Max8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002899 case Iop_Max8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002900 case Iop_Avg8Ux8:
2901 case Iop_QSub8Sx8:
2902 case Iop_QSub8Ux8:
2903 case Iop_Sub8x8:
2904 case Iop_CmpGT8Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002905 case Iop_CmpGT8Ux8:
sewardjacd2e912005-01-13 19:17:06 +00002906 case Iop_CmpEQ8x8:
2907 case Iop_QAdd8Sx8:
2908 case Iop_QAdd8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002909 case Iop_QSal8x8:
2910 case Iop_QShl8x8:
sewardjacd2e912005-01-13 19:17:06 +00002911 case Iop_Add8x8:
sewardj57f92b02010-08-22 11:54:14 +00002912 case Iop_Mul8x8:
2913 case Iop_PolynomialMul8x8:
sewardjacd2e912005-01-13 19:17:06 +00002914 return binary8Ix8(mce, vatom1, vatom2);
2915
2916 case Iop_Min16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002917 case Iop_Min16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002918 case Iop_Max16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002919 case Iop_Max16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002920 case Iop_Avg16Ux4:
2921 case Iop_QSub16Ux4:
2922 case Iop_QSub16Sx4:
2923 case Iop_Sub16x4:
2924 case Iop_Mul16x4:
2925 case Iop_MulHi16Sx4:
2926 case Iop_MulHi16Ux4:
2927 case Iop_CmpGT16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002928 case Iop_CmpGT16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002929 case Iop_CmpEQ16x4:
2930 case Iop_QAdd16Sx4:
2931 case Iop_QAdd16Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002932 case Iop_QSal16x4:
2933 case Iop_QShl16x4:
sewardjacd2e912005-01-13 19:17:06 +00002934 case Iop_Add16x4:
sewardj57f92b02010-08-22 11:54:14 +00002935 case Iop_QDMulHi16Sx4:
2936 case Iop_QRDMulHi16Sx4:
sewardjacd2e912005-01-13 19:17:06 +00002937 return binary16Ix4(mce, vatom1, vatom2);
2938
2939 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00002940 case Iop_Mul32x2:
sewardj57f92b02010-08-22 11:54:14 +00002941 case Iop_Max32Sx2:
2942 case Iop_Max32Ux2:
2943 case Iop_Min32Sx2:
2944 case Iop_Min32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002945 case Iop_CmpGT32Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002946 case Iop_CmpGT32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002947 case Iop_CmpEQ32x2:
2948 case Iop_Add32x2:
sewardj57f92b02010-08-22 11:54:14 +00002949 case Iop_QAdd32Ux2:
2950 case Iop_QAdd32Sx2:
2951 case Iop_QSub32Ux2:
2952 case Iop_QSub32Sx2:
2953 case Iop_QSal32x2:
2954 case Iop_QShl32x2:
2955 case Iop_QDMulHi32Sx2:
2956 case Iop_QRDMulHi32Sx2:
sewardjacd2e912005-01-13 19:17:06 +00002957 return binary32Ix2(mce, vatom1, vatom2);
2958
sewardj57f92b02010-08-22 11:54:14 +00002959 case Iop_QSub64Ux1:
2960 case Iop_QSub64Sx1:
2961 case Iop_QAdd64Ux1:
2962 case Iop_QAdd64Sx1:
2963 case Iop_QSal64x1:
2964 case Iop_QShl64x1:
2965 case Iop_Sal64x1:
2966 return binary64Ix1(mce, vatom1, vatom2);
2967
2968 case Iop_QShlN8Sx8:
2969 case Iop_QShlN8x8:
2970 case Iop_QSalN8x8:
sewardjb9e6d242013-05-11 13:42:08 +00002971 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002972 return mkPCast8x8(mce, vatom1);
2973
2974 case Iop_QShlN16Sx4:
2975 case Iop_QShlN16x4:
2976 case Iop_QSalN16x4:
sewardjb9e6d242013-05-11 13:42:08 +00002977 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002978 return mkPCast16x4(mce, vatom1);
2979
2980 case Iop_QShlN32Sx2:
2981 case Iop_QShlN32x2:
2982 case Iop_QSalN32x2:
sewardjb9e6d242013-05-11 13:42:08 +00002983 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002984 return mkPCast32x2(mce, vatom1);
2985
2986 case Iop_QShlN64Sx1:
2987 case Iop_QShlN64x1:
2988 case Iop_QSalN64x1:
sewardjb9e6d242013-05-11 13:42:08 +00002989 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002990 return mkPCast32x2(mce, vatom1);
2991
2992 case Iop_PwMax32Sx2:
2993 case Iop_PwMax32Ux2:
2994 case Iop_PwMin32Sx2:
2995 case Iop_PwMin32Ux2:
2996 case Iop_PwMax32Fx2:
2997 case Iop_PwMin32Fx2:
sewardj350e8f72012-06-25 07:52:15 +00002998 return assignNew('V', mce, Ity_I64,
2999 binop(Iop_PwMax32Ux2,
3000 mkPCast32x2(mce, vatom1),
3001 mkPCast32x2(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00003002
3003 case Iop_PwMax16Sx4:
3004 case Iop_PwMax16Ux4:
3005 case Iop_PwMin16Sx4:
3006 case Iop_PwMin16Ux4:
sewardj350e8f72012-06-25 07:52:15 +00003007 return assignNew('V', mce, Ity_I64,
3008 binop(Iop_PwMax16Ux4,
3009 mkPCast16x4(mce, vatom1),
3010 mkPCast16x4(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00003011
3012 case Iop_PwMax8Sx8:
3013 case Iop_PwMax8Ux8:
3014 case Iop_PwMin8Sx8:
3015 case Iop_PwMin8Ux8:
sewardj350e8f72012-06-25 07:52:15 +00003016 return assignNew('V', mce, Ity_I64,
3017 binop(Iop_PwMax8Ux8,
3018 mkPCast8x8(mce, vatom1),
3019 mkPCast8x8(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00003020
3021 case Iop_PwAdd32x2:
3022 case Iop_PwAdd32Fx2:
3023 return mkPCast32x2(mce,
sewardj350e8f72012-06-25 07:52:15 +00003024 assignNew('V', mce, Ity_I64,
3025 binop(Iop_PwAdd32x2,
3026 mkPCast32x2(mce, vatom1),
3027 mkPCast32x2(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00003028
3029 case Iop_PwAdd16x4:
3030 return mkPCast16x4(mce,
sewardj350e8f72012-06-25 07:52:15 +00003031 assignNew('V', mce, Ity_I64,
3032 binop(op, mkPCast16x4(mce, vatom1),
3033 mkPCast16x4(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00003034
3035 case Iop_PwAdd8x8:
3036 return mkPCast8x8(mce,
sewardj350e8f72012-06-25 07:52:15 +00003037 assignNew('V', mce, Ity_I64,
3038 binop(op, mkPCast8x8(mce, vatom1),
3039 mkPCast8x8(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00003040
3041 case Iop_Shl8x8:
3042 case Iop_Shr8x8:
3043 case Iop_Sar8x8:
3044 case Iop_Sal8x8:
3045 return mkUifU64(mce,
3046 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3047 mkPCast8x8(mce,vatom2)
3048 );
3049
3050 case Iop_Shl16x4:
3051 case Iop_Shr16x4:
3052 case Iop_Sar16x4:
3053 case Iop_Sal16x4:
3054 return mkUifU64(mce,
3055 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3056 mkPCast16x4(mce,vatom2)
3057 );
3058
3059 case Iop_Shl32x2:
3060 case Iop_Shr32x2:
3061 case Iop_Sar32x2:
3062 case Iop_Sal32x2:
3063 return mkUifU64(mce,
3064 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3065 mkPCast32x2(mce,vatom2)
3066 );
3067
sewardjacd2e912005-01-13 19:17:06 +00003068 /* 64-bit data-steering */
3069 case Iop_InterleaveLO32x2:
3070 case Iop_InterleaveLO16x4:
3071 case Iop_InterleaveLO8x8:
3072 case Iop_InterleaveHI32x2:
3073 case Iop_InterleaveHI16x4:
3074 case Iop_InterleaveHI8x8:
sewardj57f92b02010-08-22 11:54:14 +00003075 case Iop_CatOddLanes8x8:
3076 case Iop_CatEvenLanes8x8:
sewardj114a9172008-02-09 01:49:32 +00003077 case Iop_CatOddLanes16x4:
3078 case Iop_CatEvenLanes16x4:
sewardj57f92b02010-08-22 11:54:14 +00003079 case Iop_InterleaveOddLanes8x8:
3080 case Iop_InterleaveEvenLanes8x8:
3081 case Iop_InterleaveOddLanes16x4:
3082 case Iop_InterleaveEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003083 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00003084
sewardj57f92b02010-08-22 11:54:14 +00003085 case Iop_GetElem8x8:
sewardjb9e6d242013-05-11 13:42:08 +00003086 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003087 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3088 case Iop_GetElem16x4:
sewardjb9e6d242013-05-11 13:42:08 +00003089 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003090 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3091 case Iop_GetElem32x2:
sewardjb9e6d242013-05-11 13:42:08 +00003092 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003093 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3094
sewardj114a9172008-02-09 01:49:32 +00003095 /* Perm8x8: rearrange values in left arg using steering values
3096 from right arg. So rearrange the vbits in the same way but
3097 pessimise wrt steering values. */
3098 case Iop_Perm8x8:
3099 return mkUifU64(
3100 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003101 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00003102 mkPCast8x8(mce, vatom2)
3103 );
3104
sewardj20d38f22005-02-07 23:50:18 +00003105 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00003106
sewardj57f92b02010-08-22 11:54:14 +00003107 case Iop_ShrN8x16:
sewardja1d93302004-12-12 16:45:06 +00003108 case Iop_ShrN16x8:
3109 case Iop_ShrN32x4:
3110 case Iop_ShrN64x2:
sewardj57f92b02010-08-22 11:54:14 +00003111 case Iop_SarN8x16:
sewardja1d93302004-12-12 16:45:06 +00003112 case Iop_SarN16x8:
3113 case Iop_SarN32x4:
sewardj57f92b02010-08-22 11:54:14 +00003114 case Iop_SarN64x2:
3115 case Iop_ShlN8x16:
sewardja1d93302004-12-12 16:45:06 +00003116 case Iop_ShlN16x8:
3117 case Iop_ShlN32x4:
3118 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00003119 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3120 this is wrong now, scalar shifts are done properly lazily.
3121 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003122 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00003123 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00003124
sewardjcbf8be72005-11-10 18:34:41 +00003125 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00003126 case Iop_Shl8x16:
3127 case Iop_Shr8x16:
3128 case Iop_Sar8x16:
sewardj57f92b02010-08-22 11:54:14 +00003129 case Iop_Sal8x16:
sewardjcbf8be72005-11-10 18:34:41 +00003130 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00003131 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003132 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003133 mkPCast8x16(mce,vatom2)
3134 );
3135
3136 case Iop_Shl16x8:
3137 case Iop_Shr16x8:
3138 case Iop_Sar16x8:
sewardj57f92b02010-08-22 11:54:14 +00003139 case Iop_Sal16x8:
sewardjcbf8be72005-11-10 18:34:41 +00003140 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00003141 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003142 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003143 mkPCast16x8(mce,vatom2)
3144 );
3145
3146 case Iop_Shl32x4:
3147 case Iop_Shr32x4:
3148 case Iop_Sar32x4:
sewardj57f92b02010-08-22 11:54:14 +00003149 case Iop_Sal32x4:
sewardjcbf8be72005-11-10 18:34:41 +00003150 case Iop_Rol32x4:
carll62770672013-10-01 15:50:09 +00003151 case Iop_Rol64x2:
sewardj43d60752005-11-10 18:13:01 +00003152 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003153 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003154 mkPCast32x4(mce,vatom2)
3155 );
3156
sewardj57f92b02010-08-22 11:54:14 +00003157 case Iop_Shl64x2:
3158 case Iop_Shr64x2:
3159 case Iop_Sar64x2:
3160 case Iop_Sal64x2:
3161 return mkUifUV128(mce,
3162 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3163 mkPCast64x2(mce,vatom2)
3164 );
3165
3166 case Iop_F32ToFixed32Ux4_RZ:
3167 case Iop_F32ToFixed32Sx4_RZ:
3168 case Iop_Fixed32UToF32x4_RN:
3169 case Iop_Fixed32SToF32x4_RN:
sewardjb9e6d242013-05-11 13:42:08 +00003170 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003171 return mkPCast32x4(mce, vatom1);
3172
3173 case Iop_F32ToFixed32Ux2_RZ:
3174 case Iop_F32ToFixed32Sx2_RZ:
3175 case Iop_Fixed32UToF32x2_RN:
3176 case Iop_Fixed32SToF32x2_RN:
sewardjb9e6d242013-05-11 13:42:08 +00003177 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003178 return mkPCast32x2(mce, vatom1);
3179
sewardja1d93302004-12-12 16:45:06 +00003180 case Iop_QSub8Ux16:
3181 case Iop_QSub8Sx16:
3182 case Iop_Sub8x16:
3183 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003184 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003185 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003186 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003187 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00003188 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00003189 case Iop_CmpEQ8x16:
3190 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003191 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003192 case Iop_QAdd8Ux16:
3193 case Iop_QAdd8Sx16:
sewardj57f92b02010-08-22 11:54:14 +00003194 case Iop_QSal8x16:
3195 case Iop_QShl8x16:
sewardja1d93302004-12-12 16:45:06 +00003196 case Iop_Add8x16:
sewardj57f92b02010-08-22 11:54:14 +00003197 case Iop_Mul8x16:
3198 case Iop_PolynomialMul8x16:
carll24e40de2013-10-15 18:13:21 +00003199 case Iop_PolynomialMulAdd8x16:
sewardja1d93302004-12-12 16:45:06 +00003200 return binary8Ix16(mce, vatom1, vatom2);
3201
3202 case Iop_QSub16Ux8:
3203 case Iop_QSub16Sx8:
3204 case Iop_Sub16x8:
3205 case Iop_Mul16x8:
3206 case Iop_MulHi16Sx8:
3207 case Iop_MulHi16Ux8:
3208 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003209 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003210 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003211 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003212 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003213 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003214 case Iop_CmpEQ16x8:
3215 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00003216 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00003217 case Iop_QAdd16Ux8:
3218 case Iop_QAdd16Sx8:
sewardj57f92b02010-08-22 11:54:14 +00003219 case Iop_QSal16x8:
3220 case Iop_QShl16x8:
sewardja1d93302004-12-12 16:45:06 +00003221 case Iop_Add16x8:
sewardj57f92b02010-08-22 11:54:14 +00003222 case Iop_QDMulHi16Sx8:
3223 case Iop_QRDMulHi16Sx8:
carll24e40de2013-10-15 18:13:21 +00003224 case Iop_PolynomialMulAdd16x8:
sewardja1d93302004-12-12 16:45:06 +00003225 return binary16Ix8(mce, vatom1, vatom2);
3226
3227 case Iop_Sub32x4:
3228 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00003229 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00003230 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00003231 case Iop_QAdd32Sx4:
3232 case Iop_QAdd32Ux4:
3233 case Iop_QSub32Sx4:
3234 case Iop_QSub32Ux4:
sewardj57f92b02010-08-22 11:54:14 +00003235 case Iop_QSal32x4:
3236 case Iop_QShl32x4:
sewardj43d60752005-11-10 18:13:01 +00003237 case Iop_Avg32Ux4:
3238 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00003239 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00003240 case Iop_Max32Ux4:
3241 case Iop_Max32Sx4:
3242 case Iop_Min32Ux4:
3243 case Iop_Min32Sx4:
sewardjb823b852010-06-18 08:18:38 +00003244 case Iop_Mul32x4:
sewardj57f92b02010-08-22 11:54:14 +00003245 case Iop_QDMulHi32Sx4:
3246 case Iop_QRDMulHi32Sx4:
carll24e40de2013-10-15 18:13:21 +00003247 case Iop_PolynomialMulAdd32x4:
sewardja1d93302004-12-12 16:45:06 +00003248 return binary32Ix4(mce, vatom1, vatom2);
3249
3250 case Iop_Sub64x2:
3251 case Iop_Add64x2:
carll62770672013-10-01 15:50:09 +00003252 case Iop_Max64Sx2:
3253 case Iop_Max64Ux2:
3254 case Iop_Min64Sx2:
3255 case Iop_Min64Ux2:
sewardj9a2afe92011-10-19 15:24:55 +00003256 case Iop_CmpEQ64x2:
sewardjb823b852010-06-18 08:18:38 +00003257 case Iop_CmpGT64Sx2:
carll62770672013-10-01 15:50:09 +00003258 case Iop_CmpGT64Ux2:
sewardj57f92b02010-08-22 11:54:14 +00003259 case Iop_QSal64x2:
3260 case Iop_QShl64x2:
3261 case Iop_QAdd64Ux2:
3262 case Iop_QAdd64Sx2:
3263 case Iop_QSub64Ux2:
3264 case Iop_QSub64Sx2:
carll24e40de2013-10-15 18:13:21 +00003265 case Iop_PolynomialMulAdd64x2:
3266 case Iop_CipherV128:
3267 case Iop_CipherLV128:
3268 case Iop_NCipherV128:
3269 case Iop_NCipherLV128:
3270 return binary64Ix2(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00003271
carll62770672013-10-01 15:50:09 +00003272 case Iop_QNarrowBin64Sto32Sx4:
3273 case Iop_QNarrowBin64Uto32Ux4:
sewardj7ee7d852011-06-16 11:37:21 +00003274 case Iop_QNarrowBin32Sto16Sx8:
3275 case Iop_QNarrowBin32Uto16Ux8:
3276 case Iop_QNarrowBin32Sto16Ux8:
3277 case Iop_QNarrowBin16Sto8Sx16:
3278 case Iop_QNarrowBin16Uto8Ux16:
3279 case Iop_QNarrowBin16Sto8Ux16:
3280 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00003281
sewardj0b070592004-12-10 21:44:22 +00003282 case Iop_Min64Fx2:
3283 case Iop_Max64Fx2:
sewardj0b070592004-12-10 21:44:22 +00003284 case Iop_CmpLT64Fx2:
3285 case Iop_CmpLE64Fx2:
3286 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00003287 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00003288 return binary64Fx2(mce, vatom1, vatom2);
3289
3290 case Iop_Sub64F0x2:
3291 case Iop_Mul64F0x2:
3292 case Iop_Min64F0x2:
3293 case Iop_Max64F0x2:
3294 case Iop_Div64F0x2:
3295 case Iop_CmpLT64F0x2:
3296 case Iop_CmpLE64F0x2:
3297 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00003298 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00003299 case Iop_Add64F0x2:
3300 return binary64F0x2(mce, vatom1, vatom2);
3301
sewardj170ee212004-12-10 18:57:51 +00003302 case Iop_Min32Fx4:
3303 case Iop_Max32Fx4:
sewardj170ee212004-12-10 18:57:51 +00003304 case Iop_CmpLT32Fx4:
3305 case Iop_CmpLE32Fx4:
3306 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00003307 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00003308 case Iop_CmpGT32Fx4:
3309 case Iop_CmpGE32Fx4:
sewardj57f92b02010-08-22 11:54:14 +00003310 case Iop_Recps32Fx4:
3311 case Iop_Rsqrts32Fx4:
sewardj3245c912004-12-10 14:58:26 +00003312 return binary32Fx4(mce, vatom1, vatom2);
3313
sewardj57f92b02010-08-22 11:54:14 +00003314 case Iop_Sub32Fx2:
3315 case Iop_Mul32Fx2:
3316 case Iop_Min32Fx2:
3317 case Iop_Max32Fx2:
3318 case Iop_CmpEQ32Fx2:
3319 case Iop_CmpGT32Fx2:
3320 case Iop_CmpGE32Fx2:
3321 case Iop_Add32Fx2:
3322 case Iop_Recps32Fx2:
3323 case Iop_Rsqrts32Fx2:
3324 return binary32Fx2(mce, vatom1, vatom2);
3325
sewardj170ee212004-12-10 18:57:51 +00003326 case Iop_Sub32F0x4:
3327 case Iop_Mul32F0x4:
3328 case Iop_Min32F0x4:
3329 case Iop_Max32F0x4:
3330 case Iop_Div32F0x4:
3331 case Iop_CmpLT32F0x4:
3332 case Iop_CmpLE32F0x4:
3333 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00003334 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00003335 case Iop_Add32F0x4:
3336 return binary32F0x4(mce, vatom1, vatom2);
3337
sewardj57f92b02010-08-22 11:54:14 +00003338 case Iop_QShlN8Sx16:
3339 case Iop_QShlN8x16:
3340 case Iop_QSalN8x16:
sewardjb9e6d242013-05-11 13:42:08 +00003341 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003342 return mkPCast8x16(mce, vatom1);
3343
3344 case Iop_QShlN16Sx8:
3345 case Iop_QShlN16x8:
3346 case Iop_QSalN16x8:
sewardjb9e6d242013-05-11 13:42:08 +00003347 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003348 return mkPCast16x8(mce, vatom1);
3349
3350 case Iop_QShlN32Sx4:
3351 case Iop_QShlN32x4:
3352 case Iop_QSalN32x4:
sewardjb9e6d242013-05-11 13:42:08 +00003353 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003354 return mkPCast32x4(mce, vatom1);
3355
3356 case Iop_QShlN64Sx2:
3357 case Iop_QShlN64x2:
3358 case Iop_QSalN64x2:
sewardjb9e6d242013-05-11 13:42:08 +00003359 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003360 return mkPCast32x4(mce, vatom1);
3361
3362 case Iop_Mull32Sx2:
3363 case Iop_Mull32Ux2:
3364 case Iop_QDMulLong32Sx2:
sewardj7ee7d852011-06-16 11:37:21 +00003365 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3366 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003367
3368 case Iop_Mull16Sx4:
3369 case Iop_Mull16Ux4:
3370 case Iop_QDMulLong16Sx4:
sewardj7ee7d852011-06-16 11:37:21 +00003371 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3372 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003373
3374 case Iop_Mull8Sx8:
3375 case Iop_Mull8Ux8:
3376 case Iop_PolynomialMull8x8:
sewardj7ee7d852011-06-16 11:37:21 +00003377 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3378 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003379
3380 case Iop_PwAdd32x4:
3381 return mkPCast32x4(mce,
3382 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3383 mkPCast32x4(mce, vatom2))));
3384
3385 case Iop_PwAdd16x8:
3386 return mkPCast16x8(mce,
3387 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3388 mkPCast16x8(mce, vatom2))));
3389
3390 case Iop_PwAdd8x16:
3391 return mkPCast8x16(mce,
3392 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3393 mkPCast8x16(mce, vatom2))));
3394
sewardj20d38f22005-02-07 23:50:18 +00003395 /* V128-bit data-steering */
3396 case Iop_SetV128lo32:
3397 case Iop_SetV128lo64:
3398 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00003399 case Iop_InterleaveLO64x2:
3400 case Iop_InterleaveLO32x4:
3401 case Iop_InterleaveLO16x8:
3402 case Iop_InterleaveLO8x16:
3403 case Iop_InterleaveHI64x2:
3404 case Iop_InterleaveHI32x4:
3405 case Iop_InterleaveHI16x8:
3406 case Iop_InterleaveHI8x16:
sewardj57f92b02010-08-22 11:54:14 +00003407 case Iop_CatOddLanes8x16:
3408 case Iop_CatOddLanes16x8:
3409 case Iop_CatOddLanes32x4:
3410 case Iop_CatEvenLanes8x16:
3411 case Iop_CatEvenLanes16x8:
3412 case Iop_CatEvenLanes32x4:
3413 case Iop_InterleaveOddLanes8x16:
3414 case Iop_InterleaveOddLanes16x8:
3415 case Iop_InterleaveOddLanes32x4:
3416 case Iop_InterleaveEvenLanes8x16:
3417 case Iop_InterleaveEvenLanes16x8:
3418 case Iop_InterleaveEvenLanes32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003419 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003420
3421 case Iop_GetElem8x16:
sewardjb9e6d242013-05-11 13:42:08 +00003422 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003423 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3424 case Iop_GetElem16x8:
sewardjb9e6d242013-05-11 13:42:08 +00003425 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003426 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3427 case Iop_GetElem32x4:
sewardjb9e6d242013-05-11 13:42:08 +00003428 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003429 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3430 case Iop_GetElem64x2:
sewardjb9e6d242013-05-11 13:42:08 +00003431 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003432 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3433
sewardj620eb5b2005-10-22 12:50:43 +00003434 /* Perm8x16: rearrange values in left arg using steering values
3435 from right arg. So rearrange the vbits in the same way but
sewardj350e8f72012-06-25 07:52:15 +00003436 pessimise wrt steering values. Perm32x4 ditto. */
sewardj620eb5b2005-10-22 12:50:43 +00003437 case Iop_Perm8x16:
3438 return mkUifUV128(
3439 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003440 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00003441 mkPCast8x16(mce, vatom2)
3442 );
sewardj350e8f72012-06-25 07:52:15 +00003443 case Iop_Perm32x4:
3444 return mkUifUV128(
3445 mce,
3446 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3447 mkPCast32x4(mce, vatom2)
3448 );
sewardj170ee212004-12-10 18:57:51 +00003449
sewardj43d60752005-11-10 18:13:01 +00003450 /* These two take the lower half of each 16-bit lane, sign/zero
3451 extend it to 32, and multiply together, producing a 32x4
3452 result (and implicitly ignoring half the operand bits). So
3453 treat it as a bunch of independent 16x8 operations, but then
3454 do 32-bit shifts left-right to copy the lower half results
3455 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3456 into the upper half of each result lane. */
3457 case Iop_MullEven16Ux8:
3458 case Iop_MullEven16Sx8: {
3459 IRAtom* at;
3460 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003461 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3462 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00003463 return at;
3464 }
3465
3466 /* Same deal as Iop_MullEven16{S,U}x8 */
3467 case Iop_MullEven8Ux16:
3468 case Iop_MullEven8Sx16: {
3469 IRAtom* at;
3470 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003471 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3472 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00003473 return at;
3474 }
3475
carll62770672013-10-01 15:50:09 +00003476 /* Same deal as Iop_MullEven16{S,U}x8 */
3477 case Iop_MullEven32Ux4:
3478 case Iop_MullEven32Sx4: {
3479 IRAtom* at;
3480 at = binary32Ix4(mce,vatom1,vatom2);
3481 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN64x2, at, mkU8(32)));
3482 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN64x2, at, mkU8(32)));
3483 return at;
3484 }
3485
sewardj43d60752005-11-10 18:13:01 +00003486 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3487 32x4 -> 16x8 laneage, discarding the upper half of each lane.
3488 Simply apply same op to the V bits, since this really no more
3489 than a data steering operation. */
sewardj7ee7d852011-06-16 11:37:21 +00003490 case Iop_NarrowBin32to16x8:
3491 case Iop_NarrowBin16to8x16:
carlldfbf2942013-08-12 18:04:22 +00003492 case Iop_NarrowBin64to32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003493 return assignNew('V', mce, Ity_V128,
3494 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00003495
3496 case Iop_ShrV128:
3497 case Iop_ShlV128:
3498 /* Same scheme as with all other shifts. Note: 10 Nov 05:
3499 this is wrong now, scalar shifts are done properly lazily.
3500 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003501 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00003502 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00003503
carll24e40de2013-10-15 18:13:21 +00003504 /* SHA Iops */
3505 case Iop_SHA256:
3506 case Iop_SHA512:
3507 complainIfUndefined(mce, atom2, NULL);
3508 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3509
sewardj69a13322005-04-23 01:14:51 +00003510 /* I128-bit data-steering */
3511 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00003512 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00003513
sewardj350e8f72012-06-25 07:52:15 +00003514 /* V256-bit SIMD */
3515
sewardj350e8f72012-06-25 07:52:15 +00003516 case Iop_Max64Fx4:
3517 case Iop_Min64Fx4:
3518 return binary64Fx4(mce, vatom1, vatom2);
3519
sewardj350e8f72012-06-25 07:52:15 +00003520 case Iop_Max32Fx8:
3521 case Iop_Min32Fx8:
3522 return binary32Fx8(mce, vatom1, vatom2);
3523
3524 /* V256-bit data-steering */
3525 case Iop_V128HLtoV256:
3526 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3527
sewardj3245c912004-12-10 14:58:26 +00003528 /* Scalar floating point */
3529
sewardjb5b87402011-03-07 16:05:35 +00003530 case Iop_F32toI64S:
florian1b9609a2012-09-01 00:15:45 +00003531 case Iop_F32toI64U:
sewardjb5b87402011-03-07 16:05:35 +00003532 /* I32(rm) x F32 -> I64 */
3533 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3534
3535 case Iop_I64StoF32:
3536 /* I32(rm) x I64 -> F32 */
3537 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3538
sewardjed69fdb2006-02-03 16:12:27 +00003539 case Iop_RoundF64toInt:
3540 case Iop_RoundF64toF32:
sewardj06f96d02009-12-31 19:24:12 +00003541 case Iop_F64toI64S:
sewardja201c452011-07-24 14:15:54 +00003542 case Iop_F64toI64U:
sewardj06f96d02009-12-31 19:24:12 +00003543 case Iop_I64StoF64:
sewardjf34eb492011-04-15 11:57:05 +00003544 case Iop_I64UtoF64:
sewardj22ac5f42006-02-03 22:55:04 +00003545 case Iop_SinF64:
3546 case Iop_CosF64:
3547 case Iop_TanF64:
3548 case Iop_2xm1F64:
3549 case Iop_SqrtF64:
3550 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00003551 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3552
sewardjea8b02f2012-04-12 17:28:57 +00003553 case Iop_ShlD64:
3554 case Iop_ShrD64:
sewardj18c72fa2012-04-23 11:22:05 +00003555 case Iop_RoundD64toInt:
florian054684f2013-06-06 21:21:46 +00003556 /* I32(rm) x D64 -> D64 */
sewardjea8b02f2012-04-12 17:28:57 +00003557 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3558
3559 case Iop_ShlD128:
3560 case Iop_ShrD128:
sewardj18c72fa2012-04-23 11:22:05 +00003561 case Iop_RoundD128toInt:
florian054684f2013-06-06 21:21:46 +00003562 /* I32(rm) x D128 -> D128 */
sewardjea8b02f2012-04-12 17:28:57 +00003563 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3564
3565 case Iop_D64toI64S:
florian53eb2a02013-01-12 22:04:00 +00003566 case Iop_D64toI64U:
sewardjea8b02f2012-04-12 17:28:57 +00003567 case Iop_I64StoD64:
florian53eb2a02013-01-12 22:04:00 +00003568 case Iop_I64UtoD64:
florian054684f2013-06-06 21:21:46 +00003569 /* I32(rm) x I64/D64 -> D64/I64 */
sewardjea8b02f2012-04-12 17:28:57 +00003570 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3571
florianba5693c2013-06-17 19:04:24 +00003572 case Iop_F32toD32:
3573 case Iop_F64toD32:
3574 case Iop_F128toD32:
3575 case Iop_D32toF32:
3576 case Iop_D64toF32:
3577 case Iop_D128toF32:
3578 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
3579 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3580
3581 case Iop_F32toD64:
florian39b08d82013-05-05 15:05:42 +00003582 case Iop_F64toD64:
florianba5693c2013-06-17 19:04:24 +00003583 case Iop_F128toD64:
3584 case Iop_D32toF64:
florian39b08d82013-05-05 15:05:42 +00003585 case Iop_D64toF64:
florian39b08d82013-05-05 15:05:42 +00003586 case Iop_D128toF64:
florianba5693c2013-06-17 19:04:24 +00003587 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
florian39b08d82013-05-05 15:05:42 +00003588 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3589
florianba5693c2013-06-17 19:04:24 +00003590 case Iop_F32toD128:
3591 case Iop_F64toD128:
florian39b08d82013-05-05 15:05:42 +00003592 case Iop_F128toD128:
florianba5693c2013-06-17 19:04:24 +00003593 case Iop_D32toF128:
3594 case Iop_D64toF128:
florian39b08d82013-05-05 15:05:42 +00003595 case Iop_D128toF128:
florianba5693c2013-06-17 19:04:24 +00003596 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
florian39b08d82013-05-05 15:05:42 +00003597 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3598
sewardjd376a762010-06-27 09:08:54 +00003599 case Iop_RoundF32toInt:
sewardjaec1be32010-01-03 22:29:32 +00003600 case Iop_SqrtF32:
3601 /* I32(rm) x I32/F32 -> I32/F32 */
3602 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3603
sewardjb5b87402011-03-07 16:05:35 +00003604 case Iop_SqrtF128:
3605 /* I32(rm) x F128 -> F128 */
3606 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3607
3608 case Iop_I32StoF32:
florian1b9609a2012-09-01 00:15:45 +00003609 case Iop_I32UtoF32:
sewardjb5b87402011-03-07 16:05:35 +00003610 case Iop_F32toI32S:
florian1b9609a2012-09-01 00:15:45 +00003611 case Iop_F32toI32U:
sewardjb5b87402011-03-07 16:05:35 +00003612 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3613 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3614
3615 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
florian1b9609a2012-09-01 00:15:45 +00003616 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
sewardjb5b87402011-03-07 16:05:35 +00003617 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
florian733b4db2013-06-06 19:13:29 +00003618 case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32 */
3619 case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32 */
sewardjb5b87402011-03-07 16:05:35 +00003620 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3621
3622 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
florian1b9609a2012-09-01 00:15:45 +00003623 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
sewardjb5b87402011-03-07 16:05:35 +00003624 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
florian733b4db2013-06-06 19:13:29 +00003625 case Iop_D128toD64: /* IRRoundingMode(I64) x D128 -> D64 */
3626 case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64 */
3627 case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64 */
sewardjb5b87402011-03-07 16:05:35 +00003628 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3629
3630 case Iop_F64HLtoF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00003631 case Iop_D64HLtoD128:
sewardj350e8f72012-06-25 07:52:15 +00003632 return assignNew('V', mce, Ity_I128,
3633 binop(Iop_64HLto128, vatom1, vatom2));
sewardjb5b87402011-03-07 16:05:35 +00003634
sewardj59570ff2010-01-01 11:59:33 +00003635 case Iop_F64toI32U:
sewardj06f96d02009-12-31 19:24:12 +00003636 case Iop_F64toI32S:
sewardje9e16d32004-12-10 13:17:55 +00003637 case Iop_F64toF32:
sewardjf34eb492011-04-15 11:57:05 +00003638 case Iop_I64UtoF32:
florian53eb2a02013-01-12 22:04:00 +00003639 case Iop_D64toI32U:
3640 case Iop_D64toI32S:
3641 /* First arg is I32 (rounding mode), second is F64/D64 (data). */
sewardj95448072004-11-22 20:19:51 +00003642 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3643
sewardjea8b02f2012-04-12 17:28:57 +00003644 case Iop_D64toD32:
florian054684f2013-06-06 21:21:46 +00003645 /* First arg is I32 (rounding mode), second is D64 (data). */
florianf4bed372012-12-21 04:25:10 +00003646 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
sewardjea8b02f2012-04-12 17:28:57 +00003647
sewardj06f96d02009-12-31 19:24:12 +00003648 case Iop_F64toI16S:
sewardj95448072004-11-22 20:19:51 +00003649 /* First arg is I32 (rounding mode), second is F64 (data). */
3650 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3651
sewardj18c72fa2012-04-23 11:22:05 +00003652 case Iop_InsertExpD64:
3653 /* I64 x I64 -> D64 */
3654 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3655
3656 case Iop_InsertExpD128:
3657 /* I64 x I128 -> D128 */
3658 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3659
sewardjb5b87402011-03-07 16:05:35 +00003660 case Iop_CmpF32:
sewardj95448072004-11-22 20:19:51 +00003661 case Iop_CmpF64:
sewardjb5b87402011-03-07 16:05:35 +00003662 case Iop_CmpF128:
sewardj18c72fa2012-04-23 11:22:05 +00003663 case Iop_CmpD64:
3664 case Iop_CmpD128:
florian29a36b92012-12-26 17:48:46 +00003665 case Iop_CmpExpD64:
3666 case Iop_CmpExpD128:
sewardj95448072004-11-22 20:19:51 +00003667 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3668
3669 /* non-FP after here */
3670
3671 case Iop_DivModU64to32:
3672 case Iop_DivModS64to32:
3673 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3674
sewardj69a13322005-04-23 01:14:51 +00003675 case Iop_DivModU128to64:
3676 case Iop_DivModS128to64:
3677 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3678
florian537ed2d2012-08-20 16:51:39 +00003679 case Iop_8HLto16:
3680 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003681 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003682 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003683 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00003684 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003685
sewardjb5b87402011-03-07 16:05:35 +00003686 case Iop_DivModS64to64:
sewardj6cf40ff2005-04-20 22:31:26 +00003687 case Iop_MullS64:
3688 case Iop_MullU64: {
3689 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3690 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj350e8f72012-06-25 07:52:15 +00003691 return assignNew('V', mce, Ity_I128,
3692 binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00003693 }
3694
sewardj95448072004-11-22 20:19:51 +00003695 case Iop_MullS32:
3696 case Iop_MullU32: {
3697 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3698 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj350e8f72012-06-25 07:52:15 +00003699 return assignNew('V', mce, Ity_I64,
3700 binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00003701 }
3702
3703 case Iop_MullS16:
3704 case Iop_MullU16: {
3705 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3706 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj350e8f72012-06-25 07:52:15 +00003707 return assignNew('V', mce, Ity_I32,
3708 binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00003709 }
3710
3711 case Iop_MullS8:
3712 case Iop_MullU8: {
3713 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3714 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00003715 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00003716 }
3717
sewardj5af05062010-10-18 16:31:14 +00003718 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
cerion9e591082005-06-23 15:28:34 +00003719 case Iop_DivS32:
3720 case Iop_DivU32:
sewardja201c452011-07-24 14:15:54 +00003721 case Iop_DivU32E:
sewardj169ac042011-09-05 12:12:34 +00003722 case Iop_DivS32E:
sewardj2157b2c2012-07-11 13:20:58 +00003723 case Iop_QAdd32S: /* could probably do better */
3724 case Iop_QSub32S: /* could probably do better */
cerion9e591082005-06-23 15:28:34 +00003725 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3726
sewardjb00944a2005-12-23 12:47:16 +00003727 case Iop_DivS64:
3728 case Iop_DivU64:
sewardja201c452011-07-24 14:15:54 +00003729 case Iop_DivS64E:
sewardj169ac042011-09-05 12:12:34 +00003730 case Iop_DivU64E:
sewardjb00944a2005-12-23 12:47:16 +00003731 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3732
sewardj95448072004-11-22 20:19:51 +00003733 case Iop_Add32:
sewardj54eac252012-03-27 10:19:39 +00003734 if (mce->bogusLiterals || mce->useLLVMworkarounds)
sewardjd5204dc2004-12-31 01:16:11 +00003735 return expensiveAddSub(mce,True,Ity_I32,
3736 vatom1,vatom2, atom1,atom2);
3737 else
3738 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00003739 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00003740 if (mce->bogusLiterals)
3741 return expensiveAddSub(mce,False,Ity_I32,
3742 vatom1,vatom2, atom1,atom2);
3743 else
3744 goto cheap_AddSub32;
3745
3746 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00003747 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00003748 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3749
sewardj463b3d92005-07-18 11:41:15 +00003750 case Iop_CmpORD32S:
3751 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00003752 case Iop_CmpORD64S:
3753 case Iop_CmpORD64U:
3754 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00003755
sewardj681be302005-01-15 20:43:58 +00003756 case Iop_Add64:
sewardj54eac252012-03-27 10:19:39 +00003757 if (mce->bogusLiterals || mce->useLLVMworkarounds)
tomd9774d72005-06-27 08:11:01 +00003758 return expensiveAddSub(mce,True,Ity_I64,
3759 vatom1,vatom2, atom1,atom2);
3760 else
3761 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00003762 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00003763 if (mce->bogusLiterals)
3764 return expensiveAddSub(mce,False,Ity_I64,
3765 vatom1,vatom2, atom1,atom2);
3766 else
3767 goto cheap_AddSub64;
3768
3769 cheap_AddSub64:
3770 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00003771 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3772
sewardj95448072004-11-22 20:19:51 +00003773 case Iop_Mul16:
3774 case Iop_Add16:
3775 case Iop_Sub16:
3776 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3777
florian537ed2d2012-08-20 16:51:39 +00003778 case Iop_Mul8:
sewardj95448072004-11-22 20:19:51 +00003779 case Iop_Sub8:
3780 case Iop_Add8:
3781 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3782
sewardj69a13322005-04-23 01:14:51 +00003783 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00003784 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00003785 if (mce->bogusLiterals)
sewardj4cfa81b2012-11-08 10:58:16 +00003786 goto expensive_cmp64;
sewardj69a13322005-04-23 01:14:51 +00003787 else
3788 goto cheap_cmp64;
sewardj4cfa81b2012-11-08 10:58:16 +00003789
3790 expensive_cmp64:
3791 case Iop_ExpCmpNE64:
3792 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3793
sewardj69a13322005-04-23 01:14:51 +00003794 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00003795 case Iop_CmpLE64S: case Iop_CmpLE64U:
3796 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00003797 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3798
sewardjd5204dc2004-12-31 01:16:11 +00003799 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00003800 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00003801 if (mce->bogusLiterals)
sewardj4cfa81b2012-11-08 10:58:16 +00003802 goto expensive_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00003803 else
3804 goto cheap_cmp32;
sewardj4cfa81b2012-11-08 10:58:16 +00003805
3806 expensive_cmp32:
3807 case Iop_ExpCmpNE32:
3808 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3809
sewardjd5204dc2004-12-31 01:16:11 +00003810 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00003811 case Iop_CmpLE32S: case Iop_CmpLE32U:
3812 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00003813 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3814
3815 case Iop_CmpEQ16: case Iop_CmpNE16:
3816 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3817
sewardj4cfa81b2012-11-08 10:58:16 +00003818 case Iop_ExpCmpNE16:
3819 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 );
3820
sewardj95448072004-11-22 20:19:51 +00003821 case Iop_CmpEQ8: case Iop_CmpNE8:
3822 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3823
sewardjafed4c52009-07-12 13:00:17 +00003824 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
3825 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3826 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3827 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3828 /* Just say these all produce a defined result, regardless
3829 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
3830 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3831
sewardjaaddbc22005-10-07 09:49:53 +00003832 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3833 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3834
sewardj95448072004-11-22 20:19:51 +00003835 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00003836 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003837
sewardjdb67f5f2004-12-14 01:15:31 +00003838 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00003839 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003840
florian537ed2d2012-08-20 16:51:39 +00003841 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
sewardjaaddbc22005-10-07 09:49:53 +00003842 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003843
sewardj350e8f72012-06-25 07:52:15 +00003844 case Iop_AndV256:
3845 uifu = mkUifUV256; difd = mkDifDV256;
3846 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003847 case Iop_AndV128:
3848 uifu = mkUifUV128; difd = mkDifDV128;
3849 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003850 case Iop_And64:
3851 uifu = mkUifU64; difd = mkDifD64;
3852 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003853 case Iop_And32:
3854 uifu = mkUifU32; difd = mkDifD32;
3855 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3856 case Iop_And16:
3857 uifu = mkUifU16; difd = mkDifD16;
3858 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3859 case Iop_And8:
3860 uifu = mkUifU8; difd = mkDifD8;
3861 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3862
sewardj350e8f72012-06-25 07:52:15 +00003863 case Iop_OrV256:
3864 uifu = mkUifUV256; difd = mkDifDV256;
3865 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003866 case Iop_OrV128:
3867 uifu = mkUifUV128; difd = mkDifDV128;
3868 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003869 case Iop_Or64:
3870 uifu = mkUifU64; difd = mkDifD64;
3871 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003872 case Iop_Or32:
3873 uifu = mkUifU32; difd = mkDifD32;
3874 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3875 case Iop_Or16:
3876 uifu = mkUifU16; difd = mkDifD16;
3877 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3878 case Iop_Or8:
3879 uifu = mkUifU8; difd = mkDifD8;
3880 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3881
3882 do_And_Or:
3883 return
3884 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00003885 'V', mce,
sewardj95448072004-11-22 20:19:51 +00003886 and_or_ty,
3887 difd(mce, uifu(mce, vatom1, vatom2),
3888 difd(mce, improve(mce, atom1, vatom1),
3889 improve(mce, atom2, vatom2) ) ) );
3890
3891 case Iop_Xor8:
3892 return mkUifU8(mce, vatom1, vatom2);
3893 case Iop_Xor16:
3894 return mkUifU16(mce, vatom1, vatom2);
3895 case Iop_Xor32:
3896 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00003897 case Iop_Xor64:
3898 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00003899 case Iop_XorV128:
3900 return mkUifUV128(mce, vatom1, vatom2);
sewardj350e8f72012-06-25 07:52:15 +00003901 case Iop_XorV256:
3902 return mkUifUV256(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00003903
sewardja2f30952013-03-27 11:40:02 +00003904 /* V256-bit SIMD */
3905
3906 case Iop_ShrN16x16:
3907 case Iop_ShrN32x8:
3908 case Iop_ShrN64x4:
3909 case Iop_SarN16x16:
3910 case Iop_SarN32x8:
3911 case Iop_ShlN16x16:
3912 case Iop_ShlN32x8:
3913 case Iop_ShlN64x4:
3914 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3915 this is wrong now, scalar shifts are done properly lazily.
3916 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003917 complainIfUndefined(mce, atom2, NULL);
sewardja2f30952013-03-27 11:40:02 +00003918 return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2));
3919
3920 case Iop_QSub8Ux32:
3921 case Iop_QSub8Sx32:
3922 case Iop_Sub8x32:
3923 case Iop_Min8Ux32:
3924 case Iop_Min8Sx32:
3925 case Iop_Max8Ux32:
3926 case Iop_Max8Sx32:
3927 case Iop_CmpGT8Sx32:
3928 case Iop_CmpEQ8x32:
3929 case Iop_Avg8Ux32:
3930 case Iop_QAdd8Ux32:
3931 case Iop_QAdd8Sx32:
3932 case Iop_Add8x32:
3933 return binary8Ix32(mce, vatom1, vatom2);
3934
3935 case Iop_QSub16Ux16:
3936 case Iop_QSub16Sx16:
3937 case Iop_Sub16x16:
3938 case Iop_Mul16x16:
3939 case Iop_MulHi16Sx16:
3940 case Iop_MulHi16Ux16:
3941 case Iop_Min16Sx16:
3942 case Iop_Min16Ux16:
3943 case Iop_Max16Sx16:
3944 case Iop_Max16Ux16:
3945 case Iop_CmpGT16Sx16:
3946 case Iop_CmpEQ16x16:
3947 case Iop_Avg16Ux16:
3948 case Iop_QAdd16Ux16:
3949 case Iop_QAdd16Sx16:
3950 case Iop_Add16x16:
3951 return binary16Ix16(mce, vatom1, vatom2);
3952
3953 case Iop_Sub32x8:
3954 case Iop_CmpGT32Sx8:
3955 case Iop_CmpEQ32x8:
3956 case Iop_Add32x8:
3957 case Iop_Max32Ux8:
3958 case Iop_Max32Sx8:
3959 case Iop_Min32Ux8:
3960 case Iop_Min32Sx8:
3961 case Iop_Mul32x8:
3962 return binary32Ix8(mce, vatom1, vatom2);
3963
3964 case Iop_Sub64x4:
3965 case Iop_Add64x4:
3966 case Iop_CmpEQ64x4:
3967 case Iop_CmpGT64Sx4:
3968 return binary64Ix4(mce, vatom1, vatom2);
3969
3970 /* Perm32x8: rearrange values in left arg using steering values
3971 from right arg. So rearrange the vbits in the same way but
3972 pessimise wrt steering values. */
3973 case Iop_Perm32x8:
3974 return mkUifUV256(
3975 mce,
3976 assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)),
3977 mkPCast32x8(mce, vatom2)
3978 );
3979
njn25e49d8e72002-09-23 09:36:25 +00003980 default:
sewardj95448072004-11-22 20:19:51 +00003981 ppIROp(op);
3982 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00003983 }
njn25e49d8e72002-09-23 09:36:25 +00003984}
3985
njn25e49d8e72002-09-23 09:36:25 +00003986
sewardj95448072004-11-22 20:19:51 +00003987static
3988IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3989{
sewardjcafe5052013-01-17 14:24:35 +00003990 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
3991 selection of shadow operation implicitly duplicates the logic in
3992 do_shadow_LoadG and should be kept in sync (in the very unlikely
3993 event that the interpretation of such widening ops changes in
3994 future). See comment in do_shadow_LoadG. */
sewardj95448072004-11-22 20:19:51 +00003995 IRAtom* vatom = expr2vbits( mce, atom );
3996 tl_assert(isOriginalAtom(mce,atom));
3997 switch (op) {
3998
sewardj0b070592004-12-10 21:44:22 +00003999 case Iop_Sqrt64Fx2:
4000 return unary64Fx2(mce, vatom);
4001
4002 case Iop_Sqrt64F0x2:
4003 return unary64F0x2(mce, vatom);
4004
sewardj350e8f72012-06-25 07:52:15 +00004005 case Iop_Sqrt32Fx8:
4006 case Iop_RSqrt32Fx8:
4007 case Iop_Recip32Fx8:
4008 return unary32Fx8(mce, vatom);
4009
4010 case Iop_Sqrt64Fx4:
4011 return unary64Fx4(mce, vatom);
4012
sewardj170ee212004-12-10 18:57:51 +00004013 case Iop_Sqrt32Fx4:
4014 case Iop_RSqrt32Fx4:
4015 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00004016 case Iop_I32UtoFx4:
4017 case Iop_I32StoFx4:
4018 case Iop_QFtoI32Ux4_RZ:
4019 case Iop_QFtoI32Sx4_RZ:
4020 case Iop_RoundF32x4_RM:
4021 case Iop_RoundF32x4_RP:
4022 case Iop_RoundF32x4_RN:
4023 case Iop_RoundF32x4_RZ:
sewardj57f92b02010-08-22 11:54:14 +00004024 case Iop_Recip32x4:
4025 case Iop_Abs32Fx4:
4026 case Iop_Neg32Fx4:
4027 case Iop_Rsqrte32Fx4:
sewardj170ee212004-12-10 18:57:51 +00004028 return unary32Fx4(mce, vatom);
4029
sewardj57f92b02010-08-22 11:54:14 +00004030 case Iop_I32UtoFx2:
4031 case Iop_I32StoFx2:
4032 case Iop_Recip32Fx2:
4033 case Iop_Recip32x2:
4034 case Iop_Abs32Fx2:
4035 case Iop_Neg32Fx2:
4036 case Iop_Rsqrte32Fx2:
4037 return unary32Fx2(mce, vatom);
4038
sewardj170ee212004-12-10 18:57:51 +00004039 case Iop_Sqrt32F0x4:
4040 case Iop_RSqrt32F0x4:
4041 case Iop_Recip32F0x4:
4042 return unary32F0x4(mce, vatom);
4043
sewardj20d38f22005-02-07 23:50:18 +00004044 case Iop_32UtoV128:
4045 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00004046 case Iop_Dup8x16:
4047 case Iop_Dup16x8:
4048 case Iop_Dup32x4:
sewardj57f92b02010-08-22 11:54:14 +00004049 case Iop_Reverse16_8x16:
4050 case Iop_Reverse32_8x16:
4051 case Iop_Reverse32_16x8:
4052 case Iop_Reverse64_8x16:
4053 case Iop_Reverse64_16x8:
4054 case Iop_Reverse64_32x4:
sewardj350e8f72012-06-25 07:52:15 +00004055 case Iop_V256toV128_1: case Iop_V256toV128_0:
sewardj7cf4e6b2008-05-01 20:24:26 +00004056 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00004057
sewardjb5b87402011-03-07 16:05:35 +00004058 case Iop_F128HItoF64: /* F128 -> high half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00004059 case Iop_D128HItoD64: /* D128 -> high half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00004060 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
4061 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00004062 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00004063 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
4064
4065 case Iop_NegF128:
4066 case Iop_AbsF128:
4067 return mkPCastTo(mce, Ity_I128, vatom);
4068
4069 case Iop_I32StoF128: /* signed I32 -> F128 */
4070 case Iop_I64StoF128: /* signed I64 -> F128 */
florian1b9609a2012-09-01 00:15:45 +00004071 case Iop_I32UtoF128: /* unsigned I32 -> F128 */
4072 case Iop_I64UtoF128: /* unsigned I64 -> F128 */
sewardjb5b87402011-03-07 16:05:35 +00004073 case Iop_F32toF128: /* F32 -> F128 */
4074 case Iop_F64toF128: /* F64 -> F128 */
florian53eb2a02013-01-12 22:04:00 +00004075 case Iop_I32StoD128: /* signed I64 -> D128 */
sewardjea8b02f2012-04-12 17:28:57 +00004076 case Iop_I64StoD128: /* signed I64 -> D128 */
florian53eb2a02013-01-12 22:04:00 +00004077 case Iop_I32UtoD128: /* unsigned I32 -> D128 */
4078 case Iop_I64UtoD128: /* unsigned I64 -> D128 */
sewardjb5b87402011-03-07 16:05:35 +00004079 return mkPCastTo(mce, Ity_I128, vatom);
4080
sewardj95448072004-11-22 20:19:51 +00004081 case Iop_F32toF64:
sewardj06f96d02009-12-31 19:24:12 +00004082 case Iop_I32StoF64:
sewardj59570ff2010-01-01 11:59:33 +00004083 case Iop_I32UtoF64:
sewardj95448072004-11-22 20:19:51 +00004084 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00004085 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00004086 case Iop_Est5FRSqrt:
sewardjdead90a2008-08-08 08:38:23 +00004087 case Iop_RoundF64toF64_NEAREST:
4088 case Iop_RoundF64toF64_NegINF:
4089 case Iop_RoundF64toF64_PosINF:
4090 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00004091 case Iop_Clz64:
sewardjea8b02f2012-04-12 17:28:57 +00004092 case Iop_D32toD64:
florian53eb2a02013-01-12 22:04:00 +00004093 case Iop_I32StoD64:
4094 case Iop_I32UtoD64:
sewardj18c72fa2012-04-23 11:22:05 +00004095 case Iop_ExtractExpD64: /* D64 -> I64 */
4096 case Iop_ExtractExpD128: /* D128 -> I64 */
florian974b4092012-12-27 20:06:18 +00004097 case Iop_ExtractSigD64: /* D64 -> I64 */
4098 case Iop_ExtractSigD128: /* D128 -> I64 */
florian1943eb52012-08-22 18:09:07 +00004099 case Iop_DPBtoBCD:
4100 case Iop_BCDtoDPB:
sewardj95448072004-11-22 20:19:51 +00004101 return mkPCastTo(mce, Ity_I64, vatom);
4102
sewardjea8b02f2012-04-12 17:28:57 +00004103 case Iop_D64toD128:
4104 return mkPCastTo(mce, Ity_I128, vatom);
4105
sewardj95448072004-11-22 20:19:51 +00004106 case Iop_Clz32:
sewardjed69fdb2006-02-03 16:12:27 +00004107 case Iop_TruncF64asF32:
sewardj59570ff2010-01-01 11:59:33 +00004108 case Iop_NegF32:
4109 case Iop_AbsF32:
sewardj95448072004-11-22 20:19:51 +00004110 return mkPCastTo(mce, Ity_I32, vatom);
4111
sewardj4cfa81b2012-11-08 10:58:16 +00004112 case Iop_Ctz32:
4113 case Iop_Ctz64:
4114 return expensiveCountTrailingZeroes(mce, op, atom, vatom);
4115
sewardjd9dbc192005-04-27 11:40:27 +00004116 case Iop_1Uto64:
sewardja201c452011-07-24 14:15:54 +00004117 case Iop_1Sto64:
sewardjd9dbc192005-04-27 11:40:27 +00004118 case Iop_8Uto64:
4119 case Iop_8Sto64:
4120 case Iop_16Uto64:
4121 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00004122 case Iop_32Sto64:
4123 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00004124 case Iop_V128to64:
4125 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00004126 case Iop_128HIto64:
4127 case Iop_128to64:
sewardj57f92b02010-08-22 11:54:14 +00004128 case Iop_Dup8x8:
4129 case Iop_Dup16x4:
4130 case Iop_Dup32x2:
4131 case Iop_Reverse16_8x8:
4132 case Iop_Reverse32_8x8:
4133 case Iop_Reverse32_16x4:
4134 case Iop_Reverse64_8x8:
4135 case Iop_Reverse64_16x4:
4136 case Iop_Reverse64_32x2:
sewardj350e8f72012-06-25 07:52:15 +00004137 case Iop_V256to64_0: case Iop_V256to64_1:
4138 case Iop_V256to64_2: case Iop_V256to64_3:
sewardj7cf4e6b2008-05-01 20:24:26 +00004139 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004140
4141 case Iop_64to32:
4142 case Iop_64HIto32:
4143 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00004144 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00004145 case Iop_8Uto32:
4146 case Iop_16Uto32:
4147 case Iop_16Sto32:
4148 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00004149 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00004150 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004151
4152 case Iop_8Sto16:
4153 case Iop_8Uto16:
4154 case Iop_32to16:
4155 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00004156 case Iop_64to16:
sewardjf5176342012-12-13 18:31:49 +00004157 case Iop_GetMSBs8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00004158 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004159
4160 case Iop_1Uto8:
sewardja201c452011-07-24 14:15:54 +00004161 case Iop_1Sto8:
sewardj95448072004-11-22 20:19:51 +00004162 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00004163 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00004164 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00004165 case Iop_64to8:
sewardj4cfa81b2012-11-08 10:58:16 +00004166 case Iop_GetMSBs8x8:
sewardj7cf4e6b2008-05-01 20:24:26 +00004167 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004168
4169 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00004170 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00004171
sewardjd9dbc192005-04-27 11:40:27 +00004172 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00004173 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00004174
sewardj95448072004-11-22 20:19:51 +00004175 case Iop_ReinterpF64asI64:
4176 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00004177 case Iop_ReinterpI32asF32:
sewardj59570ff2010-01-01 11:59:33 +00004178 case Iop_ReinterpF32asI32:
sewardj18c72fa2012-04-23 11:22:05 +00004179 case Iop_ReinterpI64asD64:
sewardj0892b822012-04-29 20:20:16 +00004180 case Iop_ReinterpD64asI64:
sewardj350e8f72012-06-25 07:52:15 +00004181 case Iop_NotV256:
sewardj20d38f22005-02-07 23:50:18 +00004182 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00004183 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00004184 case Iop_Not32:
4185 case Iop_Not16:
4186 case Iop_Not8:
4187 case Iop_Not1:
4188 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00004189
sewardj57f92b02010-08-22 11:54:14 +00004190 case Iop_CmpNEZ8x8:
4191 case Iop_Cnt8x8:
4192 case Iop_Clz8Sx8:
4193 case Iop_Cls8Sx8:
4194 case Iop_Abs8x8:
4195 return mkPCast8x8(mce, vatom);
4196
4197 case Iop_CmpNEZ8x16:
4198 case Iop_Cnt8x16:
4199 case Iop_Clz8Sx16:
4200 case Iop_Cls8Sx16:
4201 case Iop_Abs8x16:
4202 return mkPCast8x16(mce, vatom);
4203
4204 case Iop_CmpNEZ16x4:
4205 case Iop_Clz16Sx4:
4206 case Iop_Cls16Sx4:
4207 case Iop_Abs16x4:
4208 return mkPCast16x4(mce, vatom);
4209
4210 case Iop_CmpNEZ16x8:
4211 case Iop_Clz16Sx8:
4212 case Iop_Cls16Sx8:
4213 case Iop_Abs16x8:
4214 return mkPCast16x8(mce, vatom);
4215
4216 case Iop_CmpNEZ32x2:
4217 case Iop_Clz32Sx2:
4218 case Iop_Cls32Sx2:
4219 case Iop_FtoI32Ux2_RZ:
4220 case Iop_FtoI32Sx2_RZ:
4221 case Iop_Abs32x2:
4222 return mkPCast32x2(mce, vatom);
4223
4224 case Iop_CmpNEZ32x4:
4225 case Iop_Clz32Sx4:
4226 case Iop_Cls32Sx4:
4227 case Iop_FtoI32Ux4_RZ:
4228 case Iop_FtoI32Sx4_RZ:
4229 case Iop_Abs32x4:
4230 return mkPCast32x4(mce, vatom);
4231
florian537ed2d2012-08-20 16:51:39 +00004232 case Iop_CmpwNEZ32:
4233 return mkPCastTo(mce, Ity_I32, vatom);
4234
sewardj57f92b02010-08-22 11:54:14 +00004235 case Iop_CmpwNEZ64:
4236 return mkPCastTo(mce, Ity_I64, vatom);
4237
4238 case Iop_CmpNEZ64x2:
carll24e40de2013-10-15 18:13:21 +00004239 case Iop_CipherSV128:
4240 case Iop_Clz64x2:
sewardj57f92b02010-08-22 11:54:14 +00004241 return mkPCast64x2(mce, vatom);
4242
carlle6bd3e42013-10-18 01:20:11 +00004243 case Iop_PwBitMtxXpose64x2:
4244 return assignNew('V', mce, Ity_V128, unop(op, vatom));
4245
sewardj7ee7d852011-06-16 11:37:21 +00004246 case Iop_NarrowUn16to8x8:
4247 case Iop_NarrowUn32to16x4:
4248 case Iop_NarrowUn64to32x2:
4249 case Iop_QNarrowUn16Sto8Sx8:
4250 case Iop_QNarrowUn16Sto8Ux8:
4251 case Iop_QNarrowUn16Uto8Ux8:
4252 case Iop_QNarrowUn32Sto16Sx4:
4253 case Iop_QNarrowUn32Sto16Ux4:
4254 case Iop_QNarrowUn32Uto16Ux4:
4255 case Iop_QNarrowUn64Sto32Sx2:
4256 case Iop_QNarrowUn64Sto32Ux2:
4257 case Iop_QNarrowUn64Uto32Ux2:
4258 return vectorNarrowUnV128(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00004259
sewardj7ee7d852011-06-16 11:37:21 +00004260 case Iop_Widen8Sto16x8:
4261 case Iop_Widen8Uto16x8:
4262 case Iop_Widen16Sto32x4:
4263 case Iop_Widen16Uto32x4:
4264 case Iop_Widen32Sto64x2:
4265 case Iop_Widen32Uto64x2:
4266 return vectorWidenI64(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00004267
4268 case Iop_PwAddL32Ux2:
4269 case Iop_PwAddL32Sx2:
4270 return mkPCastTo(mce, Ity_I64,
4271 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
4272
4273 case Iop_PwAddL16Ux4:
4274 case Iop_PwAddL16Sx4:
4275 return mkPCast32x2(mce,
4276 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
4277
4278 case Iop_PwAddL8Ux8:
4279 case Iop_PwAddL8Sx8:
4280 return mkPCast16x4(mce,
4281 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
4282
4283 case Iop_PwAddL32Ux4:
4284 case Iop_PwAddL32Sx4:
4285 return mkPCast64x2(mce,
4286 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
4287
4288 case Iop_PwAddL16Ux8:
4289 case Iop_PwAddL16Sx8:
4290 return mkPCast32x4(mce,
4291 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
4292
4293 case Iop_PwAddL8Ux16:
4294 case Iop_PwAddL8Sx16:
4295 return mkPCast16x8(mce,
4296 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
4297
sewardjf34eb492011-04-15 11:57:05 +00004298 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00004299 default:
4300 ppIROp(op);
4301 VG_(tool_panic)("memcheck:expr2vbits_Unop");
4302 }
4303}
4304
4305
sewardjb9e6d242013-05-11 13:42:08 +00004306/* Worker function -- do not call directly. See comments on
4307 expr2vbits_Load for the meaning of |guard|.
4308
4309 Generates IR to (1) perform a definedness test of |addr|, (2)
4310 perform a validity test of |addr|, and (3) return the Vbits for the
4311 location indicated by |addr|. All of this only happens when
4312 |guard| is NULL or |guard| evaluates to True at run time.
4313
4314 If |guard| evaluates to False at run time, the returned value is
4315 the IR-mandated 0x55..55 value, and no checks nor shadow loads are
4316 performed.
4317
4318 The definedness of |guard| itself is not checked. That is assumed
4319 to have been done before this point, by the caller. */
sewardj95448072004-11-22 20:19:51 +00004320static
sewardj67564542013-08-16 08:31:29 +00004321IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
4322 IREndness end, IRType ty,
sewardjcafe5052013-01-17 14:24:35 +00004323 IRAtom* addr, UInt bias, IRAtom* guard )
sewardj95448072004-11-22 20:19:51 +00004324{
sewardj95448072004-11-22 20:19:51 +00004325 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00004326 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00004327
4328 /* First, emit a definedness test for the address. This also sets
4329 the address (shadow) to 'defined' following the test. */
sewardjb9e6d242013-05-11 13:42:08 +00004330 complainIfUndefined( mce, addr, guard );
sewardj95448072004-11-22 20:19:51 +00004331
4332 /* Now cook up a call to the relevant helper function, to read the
4333 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00004334 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00004335
sewardj21a5f8c2013-08-08 10:41:46 +00004336 void* helper = NULL;
4337 const HChar* hname = NULL;
4338 Bool ret_via_outparam = False;
4339
sewardj67564542013-08-16 08:31:29 +00004340 if (end == Iend_LE) {
sewardj2e595852005-06-30 23:33:37 +00004341 switch (ty) {
sewardj67564542013-08-16 08:31:29 +00004342 case Ity_V256: helper = &MC_(helperc_LOADV256le);
4343 hname = "MC_(helperc_LOADV256le)";
4344 ret_via_outparam = True;
4345 break;
sewardj21a5f8c2013-08-08 10:41:46 +00004346 case Ity_V128: helper = &MC_(helperc_LOADV128le);
4347 hname = "MC_(helperc_LOADV128le)";
4348 ret_via_outparam = True;
4349 break;
4350 case Ity_I64: helper = &MC_(helperc_LOADV64le);
4351 hname = "MC_(helperc_LOADV64le)";
4352 break;
4353 case Ity_I32: helper = &MC_(helperc_LOADV32le);
4354 hname = "MC_(helperc_LOADV32le)";
4355 break;
4356 case Ity_I16: helper = &MC_(helperc_LOADV16le);
4357 hname = "MC_(helperc_LOADV16le)";
4358 break;
4359 case Ity_I8: helper = &MC_(helperc_LOADV8);
4360 hname = "MC_(helperc_LOADV8)";
4361 break;
4362 default: ppIRType(ty);
4363 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)");
sewardj2e595852005-06-30 23:33:37 +00004364 }
4365 } else {
sewardj8cf88b72005-07-08 01:29:33 +00004366 switch (ty) {
sewardj67564542013-08-16 08:31:29 +00004367 case Ity_V256: helper = &MC_(helperc_LOADV256be);
4368 hname = "MC_(helperc_LOADV256be)";
4369 ret_via_outparam = True;
4370 break;
sewardj21a5f8c2013-08-08 10:41:46 +00004371 case Ity_V128: helper = &MC_(helperc_LOADV128be);
4372 hname = "MC_(helperc_LOADV128be)";
4373 ret_via_outparam = True;
4374 break;
4375 case Ity_I64: helper = &MC_(helperc_LOADV64be);
4376 hname = "MC_(helperc_LOADV64be)";
4377 break;
4378 case Ity_I32: helper = &MC_(helperc_LOADV32be);
4379 hname = "MC_(helperc_LOADV32be)";
4380 break;
4381 case Ity_I16: helper = &MC_(helperc_LOADV16be);
4382 hname = "MC_(helperc_LOADV16be)";
4383 break;
4384 case Ity_I8: helper = &MC_(helperc_LOADV8);
4385 hname = "MC_(helperc_LOADV8)";
4386 break;
4387 default: ppIRType(ty);
4388 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)");
sewardj8cf88b72005-07-08 01:29:33 +00004389 }
sewardj95448072004-11-22 20:19:51 +00004390 }
4391
sewardj21a5f8c2013-08-08 10:41:46 +00004392 tl_assert(helper);
4393 tl_assert(hname);
4394
sewardj95448072004-11-22 20:19:51 +00004395 /* Generate the actual address into addrAct. */
sewardj21a5f8c2013-08-08 10:41:46 +00004396 IRAtom* addrAct;
sewardj95448072004-11-22 20:19:51 +00004397 if (bias == 0) {
4398 addrAct = addr;
4399 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00004400 IROp mkAdd;
4401 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00004402 IRType tyAddr = mce->hWordTy;
4403 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00004404 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4405 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00004406 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00004407 }
4408
4409 /* We need to have a place to park the V bits we're just about to
4410 read. */
sewardj21a5f8c2013-08-08 10:41:46 +00004411 IRTemp datavbits = newTemp(mce, ty, VSh);
4412
4413 /* Here's the call. */
4414 IRDirty* di;
4415 if (ret_via_outparam) {
4416 di = unsafeIRDirty_1_N( datavbits,
4417 2/*regparms*/,
4418 hname, VG_(fnptr_to_fnentry)( helper ),
floriana5c3ecb2013-08-15 20:55:42 +00004419 mkIRExprVec_2( IRExpr_VECRET(), addrAct ) );
sewardj21a5f8c2013-08-08 10:41:46 +00004420 } else {
4421 di = unsafeIRDirty_1_N( datavbits,
4422 1/*regparms*/,
4423 hname, VG_(fnptr_to_fnentry)( helper ),
4424 mkIRExprVec_1( addrAct ) );
4425 }
4426
sewardj95448072004-11-22 20:19:51 +00004427 setHelperAnns( mce, di );
sewardjcafe5052013-01-17 14:24:35 +00004428 if (guard) {
4429 di->guard = guard;
4430 /* Ideally the didn't-happen return value here would be all-ones
4431 (all-undefined), so it'd be obvious if it got used
4432 inadvertantly. We can get by with the IR-mandated default
4433 value (0b01 repeating, 0x55 etc) as that'll still look pretty
4434 undefined if it ever leaks out. */
4435 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004436 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00004437
4438 return mkexpr(datavbits);
4439}
4440
4441
sewardjcafe5052013-01-17 14:24:35 +00004442/* Generate IR to do a shadow load. The helper is expected to check
4443 the validity of the address and return the V bits for that address.
4444 This can optionally be controlled by a guard, which is assumed to
4445 be True if NULL. In the case where the guard is False at runtime,
sewardjb9e6d242013-05-11 13:42:08 +00004446 the helper will return the didn't-do-the-call value of 0x55..55.
4447 Since that means "completely undefined result", the caller of
sewardjcafe5052013-01-17 14:24:35 +00004448 this function will need to fix up the result somehow in that
4449 case.
sewardjb9e6d242013-05-11 13:42:08 +00004450
4451 Caller of this function is also expected to have checked the
4452 definedness of |guard| before this point.
sewardjcafe5052013-01-17 14:24:35 +00004453*/
sewardj95448072004-11-22 20:19:51 +00004454static
sewardj67564542013-08-16 08:31:29 +00004455IRAtom* expr2vbits_Load ( MCEnv* mce,
4456 IREndness end, IRType ty,
sewardjcafe5052013-01-17 14:24:35 +00004457 IRAtom* addr, UInt bias,
4458 IRAtom* guard )
sewardj170ee212004-12-10 18:57:51 +00004459{
sewardj2e595852005-06-30 23:33:37 +00004460 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00004461 switch (shadowTypeV(ty)) {
sewardj67564542013-08-16 08:31:29 +00004462 case Ity_I8:
4463 case Ity_I16:
4464 case Ity_I32:
sewardj170ee212004-12-10 18:57:51 +00004465 case Ity_I64:
sewardj21a5f8c2013-08-08 10:41:46 +00004466 case Ity_V128:
sewardj67564542013-08-16 08:31:29 +00004467 case Ity_V256:
sewardjcafe5052013-01-17 14:24:35 +00004468 return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
sewardj170ee212004-12-10 18:57:51 +00004469 default:
sewardj2e595852005-06-30 23:33:37 +00004470 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00004471 }
4472}
4473
4474
sewardjcafe5052013-01-17 14:24:35 +00004475/* The most general handler for guarded loads. Assumes the
sewardjb9e6d242013-05-11 13:42:08 +00004476 definedness of GUARD has already been checked by the caller. A
4477 GUARD of NULL is assumed to mean "always True". Generates code to
4478 check the definedness and validity of ADDR.
sewardjcafe5052013-01-17 14:24:35 +00004479
4480 Generate IR to do a shadow load from ADDR and return the V bits.
4481 The loaded type is TY. The loaded data is then (shadow) widened by
4482 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD
4483 evaluates to False at run time then the returned Vbits are simply
4484 VALT instead. Note therefore that the argument type of VWIDEN must
4485 be TY and the result type of VWIDEN must equal the type of VALT.
4486*/
florian434ffae2012-07-19 17:23:42 +00004487static
sewardjcafe5052013-01-17 14:24:35 +00004488IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce,
4489 IREndness end, IRType ty,
4490 IRAtom* addr, UInt bias,
4491 IRAtom* guard,
4492 IROp vwiden, IRAtom* valt )
florian434ffae2012-07-19 17:23:42 +00004493{
sewardjcafe5052013-01-17 14:24:35 +00004494 /* Sanity check the conversion operation, and also set TYWIDE. */
4495 IRType tyWide = Ity_INVALID;
4496 switch (vwiden) {
4497 case Iop_INVALID:
4498 tyWide = ty;
4499 break;
4500 case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32:
4501 tyWide = Ity_I32;
4502 break;
4503 default:
4504 VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General");
florian434ffae2012-07-19 17:23:42 +00004505 }
4506
sewardjcafe5052013-01-17 14:24:35 +00004507 /* If the guard evaluates to True, this will hold the loaded V bits
4508 at TY. If the guard evaluates to False, this will be all
4509 ones, meaning "all undefined", in which case we will have to
florian5686b2d2013-01-29 03:57:40 +00004510 replace it using an ITE below. */
sewardjcafe5052013-01-17 14:24:35 +00004511 IRAtom* iftrue1
4512 = assignNew('V', mce, ty,
4513 expr2vbits_Load(mce, end, ty, addr, bias, guard));
4514 /* Now (shadow-) widen the loaded V bits to the desired width. In
4515 the guard-is-False case, the allowable widening operators will
4516 in the worst case (unsigned widening) at least leave the
4517 pre-widened part as being marked all-undefined, and in the best
4518 case (signed widening) mark the whole widened result as
4519 undefined. Anyway, it doesn't matter really, since in this case
florian5686b2d2013-01-29 03:57:40 +00004520 we will replace said value with the default value |valt| using an
4521 ITE. */
sewardjcafe5052013-01-17 14:24:35 +00004522 IRAtom* iftrue2
4523 = vwiden == Iop_INVALID
4524 ? iftrue1
4525 : assignNew('V', mce, tyWide, unop(vwiden, iftrue1));
4526 /* These are the V bits we will return if the load doesn't take
4527 place. */
4528 IRAtom* iffalse
4529 = valt;
florian5686b2d2013-01-29 03:57:40 +00004530 /* Prepare the cond for the ITE. Convert a NULL cond into
sewardjcafe5052013-01-17 14:24:35 +00004531 something that iropt knows how to fold out later. */
4532 IRAtom* cond
sewardjcc961652013-01-26 11:49:15 +00004533 = guard == NULL ? mkU1(1) : guard;
sewardjcafe5052013-01-17 14:24:35 +00004534 /* And assemble the final result. */
florian5686b2d2013-01-29 03:57:40 +00004535 return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse));
sewardjcafe5052013-01-17 14:24:35 +00004536}
4537
4538
4539/* A simpler handler for guarded loads, in which there is no
4540 conversion operation, and the default V bit return (when the guard
4541 evaluates to False at runtime) is "all defined". If there is no
4542 guard expression or the guard is always TRUE this function behaves
sewardjb9e6d242013-05-11 13:42:08 +00004543 like expr2vbits_Load. It is assumed that definedness of GUARD has
4544 already been checked at the call site. */
sewardjcafe5052013-01-17 14:24:35 +00004545static
4546IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce,
4547 IREndness end, IRType ty,
4548 IRAtom* addr, UInt bias,
4549 IRAtom *guard )
4550{
4551 return expr2vbits_Load_guarded_General(
4552 mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty)
4553 );
florian434ffae2012-07-19 17:23:42 +00004554}
4555
4556
sewardj170ee212004-12-10 18:57:51 +00004557static
florian5686b2d2013-01-29 03:57:40 +00004558IRAtom* expr2vbits_ITE ( MCEnv* mce,
4559 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
sewardj95448072004-11-22 20:19:51 +00004560{
florian5686b2d2013-01-29 03:57:40 +00004561 IRAtom *vbitsC, *vbits0, *vbits1;
sewardj95448072004-11-22 20:19:51 +00004562 IRType ty;
sewardj07bfda22013-01-29 21:11:55 +00004563 /* Given ITE(cond, iftrue, iffalse), generate
4564 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
sewardj95448072004-11-22 20:19:51 +00004565 That is, steer the V bits like the originals, but trash the
4566 result if the steering value is undefined. This gives
4567 lazy propagation. */
4568 tl_assert(isOriginalAtom(mce, cond));
florian5686b2d2013-01-29 03:57:40 +00004569 tl_assert(isOriginalAtom(mce, iftrue));
4570 tl_assert(isOriginalAtom(mce, iffalse));
sewardj95448072004-11-22 20:19:51 +00004571
4572 vbitsC = expr2vbits(mce, cond);
florian5686b2d2013-01-29 03:57:40 +00004573 vbits1 = expr2vbits(mce, iftrue);
sewardj07bfda22013-01-29 21:11:55 +00004574 vbits0 = expr2vbits(mce, iffalse);
sewardj1c0ce7a2009-07-01 08:10:49 +00004575 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00004576
4577 return
sewardj7cf4e6b2008-05-01 20:24:26 +00004578 mkUifU(mce, ty, assignNew('V', mce, ty,
florian5686b2d2013-01-29 03:57:40 +00004579 IRExpr_ITE(cond, vbits1, vbits0)),
sewardj95448072004-11-22 20:19:51 +00004580 mkPCastTo(mce, ty, vbitsC) );
4581}
4582
4583/* --------- This is the main expression-handling function. --------- */
4584
4585static
4586IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4587{
4588 switch (e->tag) {
4589
4590 case Iex_Get:
4591 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4592
4593 case Iex_GetI:
4594 return shadow_GETI( mce, e->Iex.GetI.descr,
4595 e->Iex.GetI.ix, e->Iex.GetI.bias );
4596
sewardj0b9d74a2006-12-24 02:24:11 +00004597 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004598 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00004599
4600 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00004601 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00004602
sewardje91cea72006-02-08 19:32:02 +00004603 case Iex_Qop:
4604 return expr2vbits_Qop(
4605 mce,
floriane2ab2972012-06-01 20:43:03 +00004606 e->Iex.Qop.details->op,
4607 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4608 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
sewardje91cea72006-02-08 19:32:02 +00004609 );
4610
sewardjed69fdb2006-02-03 16:12:27 +00004611 case Iex_Triop:
4612 return expr2vbits_Triop(
4613 mce,
florian26441742012-06-02 20:30:41 +00004614 e->Iex.Triop.details->op,
4615 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4616 e->Iex.Triop.details->arg3
sewardjed69fdb2006-02-03 16:12:27 +00004617 );
4618
sewardj95448072004-11-22 20:19:51 +00004619 case Iex_Binop:
4620 return expr2vbits_Binop(
4621 mce,
4622 e->Iex.Binop.op,
4623 e->Iex.Binop.arg1, e->Iex.Binop.arg2
4624 );
4625
4626 case Iex_Unop:
4627 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4628
sewardj2e595852005-06-30 23:33:37 +00004629 case Iex_Load:
4630 return expr2vbits_Load( mce, e->Iex.Load.end,
4631 e->Iex.Load.ty,
sewardjcafe5052013-01-17 14:24:35 +00004632 e->Iex.Load.addr, 0/*addr bias*/,
4633 NULL/* guard == "always True"*/ );
sewardj95448072004-11-22 20:19:51 +00004634
4635 case Iex_CCall:
4636 return mkLazyN( mce, e->Iex.CCall.args,
4637 e->Iex.CCall.retty,
4638 e->Iex.CCall.cee );
4639
florian5686b2d2013-01-29 03:57:40 +00004640 case Iex_ITE:
4641 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
sewardj07bfda22013-01-29 21:11:55 +00004642 e->Iex.ITE.iffalse);
njn25e49d8e72002-09-23 09:36:25 +00004643
4644 default:
sewardj95448072004-11-22 20:19:51 +00004645 VG_(printf)("\n");
4646 ppIRExpr(e);
4647 VG_(printf)("\n");
4648 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00004649 }
njn25e49d8e72002-09-23 09:36:25 +00004650}
4651
4652/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00004653/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00004654/*------------------------------------------------------------*/
4655
sewardj95448072004-11-22 20:19:51 +00004656/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00004657
4658static
sewardj95448072004-11-22 20:19:51 +00004659IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00004660{
sewardj7cf97ee2004-11-28 14:25:01 +00004661 IRType ty, tyH;
4662
sewardj95448072004-11-22 20:19:51 +00004663 /* vatom is vbits-value and as such can only have a shadow type. */
4664 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00004665
sewardj1c0ce7a2009-07-01 08:10:49 +00004666 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00004667 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00004668
sewardj95448072004-11-22 20:19:51 +00004669 if (tyH == Ity_I32) {
4670 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004671 case Ity_I32:
4672 return vatom;
4673 case Ity_I16:
4674 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4675 case Ity_I8:
4676 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4677 default:
4678 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004679 }
sewardj6cf40ff2005-04-20 22:31:26 +00004680 } else
4681 if (tyH == Ity_I64) {
4682 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004683 case Ity_I32:
4684 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4685 case Ity_I16:
4686 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4687 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4688 case Ity_I8:
4689 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4690 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4691 default:
4692 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00004693 }
sewardj95448072004-11-22 20:19:51 +00004694 } else {
4695 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004696 }
sewardj95448072004-11-22 20:19:51 +00004697 unhandled:
4698 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
4699 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00004700}
4701
njn25e49d8e72002-09-23 09:36:25 +00004702
sewardjcafe5052013-01-17 14:24:35 +00004703/* Generate a shadow store. |addr| is always the original address
4704 atom. You can pass in either originals or V-bits for the data
4705 atom, but obviously not both. This function generates a check for
sewardjb9e6d242013-05-11 13:42:08 +00004706 the definedness and (indirectly) the validity of |addr|, but only
4707 when |guard| evaluates to True at run time (or is NULL).
njn25e49d8e72002-09-23 09:36:25 +00004708
sewardjcafe5052013-01-17 14:24:35 +00004709 |guard| :: Ity_I1 controls whether the store really happens; NULL
4710 means it unconditionally does. Note that |guard| itself is not
4711 checked for definedness; the caller of this function must do that
4712 if necessary.
4713*/
sewardj95448072004-11-22 20:19:51 +00004714static
sewardj2e595852005-06-30 23:33:37 +00004715void do_shadow_Store ( MCEnv* mce,
4716 IREndness end,
4717 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00004718 IRAtom* data, IRAtom* vdata,
4719 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00004720{
sewardj170ee212004-12-10 18:57:51 +00004721 IROp mkAdd;
4722 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00004723 void* helper = NULL;
floriana5f894c2012-10-21 03:43:20 +00004724 const HChar* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00004725 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00004726
4727 tyAddr = mce->hWordTy;
4728 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4729 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00004730 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00004731
sewardj95448072004-11-22 20:19:51 +00004732 if (data) {
4733 tl_assert(!vdata);
4734 tl_assert(isOriginalAtom(mce, data));
4735 tl_assert(bias == 0);
4736 vdata = expr2vbits( mce, data );
4737 } else {
4738 tl_assert(vdata);
4739 }
njn25e49d8e72002-09-23 09:36:25 +00004740
sewardj95448072004-11-22 20:19:51 +00004741 tl_assert(isOriginalAtom(mce,addr));
4742 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00004743
sewardj1c0ce7a2009-07-01 08:10:49 +00004744 if (guard) {
4745 tl_assert(isOriginalAtom(mce, guard));
4746 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4747 }
4748
4749 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00004750
njn1d0825f2006-03-27 11:37:07 +00004751 // If we're not doing undefined value checking, pretend that this value
4752 // is "all valid". That lets Vex's optimiser remove some of the V bit
4753 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00004754 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00004755 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004756 case Ity_V256: // V256 weirdness -- used four times
sewardjbd43bfa2012-06-29 15:29:37 +00004757 c = IRConst_V256(V_BITS32_DEFINED); break;
sewardj45fa9f42012-05-21 10:18:10 +00004758 case Ity_V128: // V128 weirdness -- used twice
sewardj1c0ce7a2009-07-01 08:10:49 +00004759 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00004760 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
4761 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
4762 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
4763 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
4764 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4765 }
4766 vdata = IRExpr_Const( c );
4767 }
4768
sewardj95448072004-11-22 20:19:51 +00004769 /* First, emit a definedness test for the address. This also sets
sewardjb9e6d242013-05-11 13:42:08 +00004770 the address (shadow) to 'defined' following the test. Both of
4771 those actions are gated on |guard|. */
4772 complainIfUndefined( mce, addr, guard );
njn25e49d8e72002-09-23 09:36:25 +00004773
sewardj170ee212004-12-10 18:57:51 +00004774 /* Now decide which helper function to call to write the data V
4775 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00004776 if (end == Iend_LE) {
4777 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004778 case Ity_V256: /* we'll use the helper four times */
sewardj2e595852005-06-30 23:33:37 +00004779 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004780 case Ity_I64: helper = &MC_(helperc_STOREV64le);
4781 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00004782 break;
njn1d0825f2006-03-27 11:37:07 +00004783 case Ity_I32: helper = &MC_(helperc_STOREV32le);
4784 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00004785 break;
njn1d0825f2006-03-27 11:37:07 +00004786 case Ity_I16: helper = &MC_(helperc_STOREV16le);
4787 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00004788 break;
njn1d0825f2006-03-27 11:37:07 +00004789 case Ity_I8: helper = &MC_(helperc_STOREV8);
4790 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00004791 break;
4792 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4793 }
4794 } else {
sewardj8cf88b72005-07-08 01:29:33 +00004795 switch (ty) {
4796 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004797 case Ity_I64: helper = &MC_(helperc_STOREV64be);
4798 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00004799 break;
njn1d0825f2006-03-27 11:37:07 +00004800 case Ity_I32: helper = &MC_(helperc_STOREV32be);
4801 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00004802 break;
njn1d0825f2006-03-27 11:37:07 +00004803 case Ity_I16: helper = &MC_(helperc_STOREV16be);
4804 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00004805 break;
njn1d0825f2006-03-27 11:37:07 +00004806 case Ity_I8: helper = &MC_(helperc_STOREV8);
4807 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00004808 break;
sewardj45fa9f42012-05-21 10:18:10 +00004809 /* Note, no V256 case here, because no big-endian target that
4810 we support, has 256 vectors. */
sewardj8cf88b72005-07-08 01:29:33 +00004811 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
4812 }
sewardj95448072004-11-22 20:19:51 +00004813 }
njn25e49d8e72002-09-23 09:36:25 +00004814
sewardj45fa9f42012-05-21 10:18:10 +00004815 if (UNLIKELY(ty == Ity_V256)) {
4816
4817 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
4818 Q3 being the most significant lane. */
4819 /* These are the offsets of the Qs in memory. */
4820 Int offQ0, offQ1, offQ2, offQ3;
4821
4822 /* Various bits for constructing the 4 lane helper calls */
4823 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
4824 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
4825 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
4826 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
4827
4828 if (end == Iend_LE) {
4829 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
4830 } else {
4831 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
4832 }
4833
4834 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
4835 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
4836 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
4837 diQ0 = unsafeIRDirty_0_N(
4838 1/*regparms*/,
4839 hname, VG_(fnptr_to_fnentry)( helper ),
4840 mkIRExprVec_2( addrQ0, vdataQ0 )
4841 );
4842
4843 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
4844 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
4845 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
4846 diQ1 = unsafeIRDirty_0_N(
4847 1/*regparms*/,
4848 hname, VG_(fnptr_to_fnentry)( helper ),
4849 mkIRExprVec_2( addrQ1, vdataQ1 )
4850 );
4851
4852 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
4853 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
4854 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
4855 diQ2 = unsafeIRDirty_0_N(
4856 1/*regparms*/,
4857 hname, VG_(fnptr_to_fnentry)( helper ),
4858 mkIRExprVec_2( addrQ2, vdataQ2 )
4859 );
4860
4861 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
4862 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
4863 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
4864 diQ3 = unsafeIRDirty_0_N(
4865 1/*regparms*/,
4866 hname, VG_(fnptr_to_fnentry)( helper ),
4867 mkIRExprVec_2( addrQ3, vdataQ3 )
4868 );
4869
4870 if (guard)
4871 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
4872
4873 setHelperAnns( mce, diQ0 );
4874 setHelperAnns( mce, diQ1 );
4875 setHelperAnns( mce, diQ2 );
4876 setHelperAnns( mce, diQ3 );
4877 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
4878 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
4879 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
4880 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
4881
4882 }
4883 else if (UNLIKELY(ty == Ity_V128)) {
sewardj170ee212004-12-10 18:57:51 +00004884
sewardj20d38f22005-02-07 23:50:18 +00004885 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00004886 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00004887 /* also, need to be careful about endianness */
4888
njn4c245e52009-03-15 23:25:38 +00004889 Int offLo64, offHi64;
4890 IRDirty *diLo64, *diHi64;
4891 IRAtom *addrLo64, *addrHi64;
4892 IRAtom *vdataLo64, *vdataHi64;
4893 IRAtom *eBiasLo64, *eBiasHi64;
4894
sewardj2e595852005-06-30 23:33:37 +00004895 if (end == Iend_LE) {
4896 offLo64 = 0;
4897 offHi64 = 8;
4898 } else {
sewardj2e595852005-06-30 23:33:37 +00004899 offLo64 = 8;
4900 offHi64 = 0;
4901 }
4902
4903 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004904 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
4905 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004906 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004907 1/*regparms*/,
4908 hname, VG_(fnptr_to_fnentry)( helper ),
4909 mkIRExprVec_2( addrLo64, vdataLo64 )
4910 );
sewardj2e595852005-06-30 23:33:37 +00004911 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004912 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
4913 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004914 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004915 1/*regparms*/,
4916 hname, VG_(fnptr_to_fnentry)( helper ),
4917 mkIRExprVec_2( addrHi64, vdataHi64 )
4918 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004919 if (guard) diLo64->guard = guard;
4920 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004921 setHelperAnns( mce, diLo64 );
4922 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00004923 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
4924 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00004925
sewardj95448072004-11-22 20:19:51 +00004926 } else {
sewardj170ee212004-12-10 18:57:51 +00004927
njn4c245e52009-03-15 23:25:38 +00004928 IRDirty *di;
4929 IRAtom *addrAct;
4930
sewardj170ee212004-12-10 18:57:51 +00004931 /* 8/16/32/64-bit cases */
4932 /* Generate the actual address into addrAct. */
4933 if (bias == 0) {
4934 addrAct = addr;
4935 } else {
njn4c245e52009-03-15 23:25:38 +00004936 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00004937 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00004938 }
4939
4940 if (ty == Ity_I64) {
4941 /* We can't do this with regparm 2 on 32-bit platforms, since
4942 the back ends aren't clever enough to handle 64-bit
4943 regparm args. Therefore be different. */
4944 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004945 1/*regparms*/,
4946 hname, VG_(fnptr_to_fnentry)( helper ),
4947 mkIRExprVec_2( addrAct, vdata )
4948 );
sewardj170ee212004-12-10 18:57:51 +00004949 } else {
4950 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004951 2/*regparms*/,
4952 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00004953 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00004954 zwidenToHostWord( mce, vdata ))
4955 );
sewardj170ee212004-12-10 18:57:51 +00004956 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004957 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004958 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00004959 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00004960 }
njn25e49d8e72002-09-23 09:36:25 +00004961
sewardj95448072004-11-22 20:19:51 +00004962}
njn25e49d8e72002-09-23 09:36:25 +00004963
njn25e49d8e72002-09-23 09:36:25 +00004964
sewardj95448072004-11-22 20:19:51 +00004965/* Do lazy pessimistic propagation through a dirty helper call, by
4966 looking at the annotations on it. This is the most complex part of
4967 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00004968
sewardj95448072004-11-22 20:19:51 +00004969static IRType szToITy ( Int n )
4970{
4971 switch (n) {
4972 case 1: return Ity_I8;
4973 case 2: return Ity_I16;
4974 case 4: return Ity_I32;
4975 case 8: return Ity_I64;
4976 default: VG_(tool_panic)("szToITy(memcheck)");
4977 }
4978}
njn25e49d8e72002-09-23 09:36:25 +00004979
sewardj95448072004-11-22 20:19:51 +00004980static
4981void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
4982{
sewardj2eecb742012-06-01 16:11:41 +00004983 Int i, k, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00004984 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00004985 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00004986 IRTemp dst;
4987 IREndness end;
4988
4989 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00004990# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004991 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00004992# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004993 end = Iend_LE;
4994# else
4995# error "Unknown endianness"
4996# endif
njn25e49d8e72002-09-23 09:36:25 +00004997
sewardj95448072004-11-22 20:19:51 +00004998 /* First check the guard. */
sewardjb9e6d242013-05-11 13:42:08 +00004999 complainIfUndefined(mce, d->guard, NULL);
sewardj95448072004-11-22 20:19:51 +00005000
5001 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00005002 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00005003
florian434ffae2012-07-19 17:23:42 +00005004 /* Inputs: unmasked args
5005 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj95448072004-11-22 20:19:51 +00005006 for (i = 0; d->args[i]; i++) {
sewardj21a5f8c2013-08-08 10:41:46 +00005007 IRAtom* arg = d->args[i];
5008 if ( (d->cee->mcx_mask & (1<<i))
floriana5c3ecb2013-08-15 20:55:42 +00005009 || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) {
sewardj95448072004-11-22 20:19:51 +00005010 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00005011 } else {
sewardj21a5f8c2013-08-08 10:41:46 +00005012 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, arg) );
sewardj95448072004-11-22 20:19:51 +00005013 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00005014 }
5015 }
sewardj95448072004-11-22 20:19:51 +00005016
5017 /* Inputs: guest state that we read. */
5018 for (i = 0; i < d->nFxState; i++) {
5019 tl_assert(d->fxState[i].fx != Ifx_None);
5020 if (d->fxState[i].fx == Ifx_Write)
5021 continue;
sewardja7203252004-11-26 19:17:47 +00005022
sewardj2eecb742012-06-01 16:11:41 +00005023 /* Enumerate the described state segments */
5024 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5025 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5026 gSz = d->fxState[i].size;
sewardja7203252004-11-26 19:17:47 +00005027
sewardj2eecb742012-06-01 16:11:41 +00005028 /* Ignore any sections marked as 'always defined'. */
5029 if (isAlwaysDefd(mce, gOff, gSz)) {
5030 if (0)
5031 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5032 gOff, gSz);
5033 continue;
5034 }
sewardje9e16d32004-12-10 13:17:55 +00005035
sewardj2eecb742012-06-01 16:11:41 +00005036 /* This state element is read or modified. So we need to
5037 consider it. If larger than 8 bytes, deal with it in
5038 8-byte chunks. */
5039 while (True) {
5040 tl_assert(gSz >= 0);
5041 if (gSz == 0) break;
5042 n = gSz <= 8 ? gSz : 8;
5043 /* update 'curr' with UifU of the state slice
5044 gOff .. gOff+n-1 */
5045 tySrc = szToITy( n );
florian434ffae2012-07-19 17:23:42 +00005046
5047 /* Observe the guard expression. If it is false use an
5048 all-bits-defined bit pattern */
5049 IRAtom *cond, *iffalse, *iftrue;
5050
sewardjcc961652013-01-26 11:49:15 +00005051 cond = assignNew('V', mce, Ity_I1, d->guard);
florian434ffae2012-07-19 17:23:42 +00005052 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
5053 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
5054 src = assignNew('V', mce, tySrc,
florian5686b2d2013-01-29 03:57:40 +00005055 IRExpr_ITE(cond, iftrue, iffalse));
florian434ffae2012-07-19 17:23:42 +00005056
sewardj2eecb742012-06-01 16:11:41 +00005057 here = mkPCastTo( mce, Ity_I32, src );
5058 curr = mkUifU32(mce, here, curr);
5059 gSz -= n;
5060 gOff += n;
5061 }
5062 }
sewardj95448072004-11-22 20:19:51 +00005063 }
5064
5065 /* Inputs: memory. First set up some info needed regardless of
5066 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00005067
5068 if (d->mFx != Ifx_None) {
5069 /* Because we may do multiple shadow loads/stores from the same
5070 base address, it's best to do a single test of its
5071 definedness right now. Post-instrumentation optimisation
5072 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00005073 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00005074 tl_assert(d->mAddr);
sewardjb9e6d242013-05-11 13:42:08 +00005075 complainIfUndefined(mce, d->mAddr, d->guard);
sewardj95448072004-11-22 20:19:51 +00005076
sewardj1c0ce7a2009-07-01 08:10:49 +00005077 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00005078 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
5079 tl_assert(tyAddr == mce->hWordTy); /* not really right */
5080 }
5081
5082 /* Deal with memory inputs (reads or modifies) */
5083 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00005084 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00005085 /* chew off 32-bit chunks. We don't care about the endianness
5086 since it's all going to be condensed down to a single bit,
5087 but nevertheless choose an endianness which is hopefully
5088 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00005089 while (toDo >= 4) {
5090 here = mkPCastTo(
5091 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00005092 expr2vbits_Load_guarded_Simple(
5093 mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00005094 );
5095 curr = mkUifU32(mce, here, curr);
5096 toDo -= 4;
5097 }
5098 /* chew off 16-bit chunks */
5099 while (toDo >= 2) {
5100 here = mkPCastTo(
5101 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00005102 expr2vbits_Load_guarded_Simple(
5103 mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00005104 );
5105 curr = mkUifU32(mce, here, curr);
5106 toDo -= 2;
5107 }
floriancda994b2012-06-08 16:01:19 +00005108 /* chew off the remaining 8-bit chunk, if any */
5109 if (toDo == 1) {
5110 here = mkPCastTo(
5111 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00005112 expr2vbits_Load_guarded_Simple(
5113 mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard )
floriancda994b2012-06-08 16:01:19 +00005114 );
5115 curr = mkUifU32(mce, here, curr);
5116 toDo -= 1;
5117 }
5118 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00005119 }
5120
5121 /* Whew! So curr is a 32-bit V-value summarising pessimistically
5122 all the inputs to the helper. Now we need to re-distribute the
5123 results to all destinations. */
5124
5125 /* Outputs: the destination temporary, if there is one. */
5126 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005127 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00005128 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00005129 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00005130 }
5131
5132 /* Outputs: guest state that we write or modify. */
5133 for (i = 0; i < d->nFxState; i++) {
5134 tl_assert(d->fxState[i].fx != Ifx_None);
5135 if (d->fxState[i].fx == Ifx_Read)
5136 continue;
sewardj2eecb742012-06-01 16:11:41 +00005137
5138 /* Enumerate the described state segments */
5139 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5140 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5141 gSz = d->fxState[i].size;
5142
5143 /* Ignore any sections marked as 'always defined'. */
5144 if (isAlwaysDefd(mce, gOff, gSz))
5145 continue;
5146
5147 /* This state element is written or modified. So we need to
5148 consider it. If larger than 8 bytes, deal with it in
5149 8-byte chunks. */
5150 while (True) {
5151 tl_assert(gSz >= 0);
5152 if (gSz == 0) break;
5153 n = gSz <= 8 ? gSz : 8;
5154 /* Write suitably-casted 'curr' to the state slice
5155 gOff .. gOff+n-1 */
5156 tyDst = szToITy( n );
5157 do_shadow_PUT( mce, gOff,
5158 NULL, /* original atom */
florian434ffae2012-07-19 17:23:42 +00005159 mkPCastTo( mce, tyDst, curr ), d->guard );
sewardj2eecb742012-06-01 16:11:41 +00005160 gSz -= n;
5161 gOff += n;
5162 }
sewardje9e16d32004-12-10 13:17:55 +00005163 }
sewardj95448072004-11-22 20:19:51 +00005164 }
5165
sewardj2e595852005-06-30 23:33:37 +00005166 /* Outputs: memory that we write or modify. Same comments about
5167 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00005168 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00005169 toDo = d->mSize;
5170 /* chew off 32-bit chunks */
5171 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00005172 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5173 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00005174 mkPCastTo( mce, Ity_I32, curr ),
florian434ffae2012-07-19 17:23:42 +00005175 d->guard );
sewardj95448072004-11-22 20:19:51 +00005176 toDo -= 4;
5177 }
5178 /* chew off 16-bit chunks */
5179 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00005180 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5181 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00005182 mkPCastTo( mce, Ity_I16, curr ),
florian434ffae2012-07-19 17:23:42 +00005183 d->guard );
sewardj95448072004-11-22 20:19:51 +00005184 toDo -= 2;
5185 }
floriancda994b2012-06-08 16:01:19 +00005186 /* chew off the remaining 8-bit chunk, if any */
5187 if (toDo == 1) {
5188 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5189 NULL, /* original data */
5190 mkPCastTo( mce, Ity_I8, curr ),
florian434ffae2012-07-19 17:23:42 +00005191 d->guard );
floriancda994b2012-06-08 16:01:19 +00005192 toDo -= 1;
5193 }
5194 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00005195 }
5196
njn25e49d8e72002-09-23 09:36:25 +00005197}
5198
sewardj1c0ce7a2009-07-01 08:10:49 +00005199
sewardj826ec492005-05-12 18:05:00 +00005200/* We have an ABI hint telling us that [base .. base+len-1] is to
5201 become undefined ("writable"). Generate code to call a helper to
5202 notify the A/V bit machinery of this fact.
5203
5204 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00005205 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
5206 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00005207*/
5208static
sewardj7cf4e6b2008-05-01 20:24:26 +00005209void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00005210{
5211 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00005212 /* Minor optimisation: if not doing origin tracking, ignore the
5213 supplied nia and pass zero instead. This is on the basis that
5214 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
5215 almost always generate a shorter instruction to put zero into a
5216 register than any other value. */
5217 if (MC_(clo_mc_level) < 3)
5218 nia = mkIRExpr_HWord(0);
5219
sewardj826ec492005-05-12 18:05:00 +00005220 di = unsafeIRDirty_0_N(
5221 0/*regparms*/,
5222 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00005223 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00005224 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00005225 );
sewardj7cf4e6b2008-05-01 20:24:26 +00005226 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00005227}
5228
njn25e49d8e72002-09-23 09:36:25 +00005229
sewardj1c0ce7a2009-07-01 08:10:49 +00005230/* ------ Dealing with IRCAS (big and complex) ------ */
5231
5232/* FWDS */
5233static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5234 IRAtom* baseaddr, Int offset );
5235static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
5236static void gen_store_b ( MCEnv* mce, Int szB,
5237 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5238 IRAtom* guard );
5239
5240static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
5241static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
5242
5243
5244/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
5245 IRExpr.Consts, else this asserts. If they are both Consts, it
5246 doesn't do anything. So that just leaves the RdTmp case.
5247
5248 In which case: this assigns the shadow value SHADOW to the IR
5249 shadow temporary associated with ORIG. That is, ORIG, being an
5250 original temporary, will have a shadow temporary associated with
5251 it. However, in the case envisaged here, there will so far have
5252 been no IR emitted to actually write a shadow value into that
5253 temporary. What this routine does is to (emit IR to) copy the
5254 value in SHADOW into said temporary, so that after this call,
5255 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
5256 value in SHADOW.
5257
5258 Point is to allow callers to compute "by hand" a shadow value for
5259 ORIG, and force it to be associated with ORIG.
5260
5261 How do we know that that shadow associated with ORIG has not so far
5262 been assigned to? Well, we don't per se know that, but supposing
5263 it had. Then this routine would create a second assignment to it,
5264 and later the IR sanity checker would barf. But that never
5265 happens. QED.
5266*/
5267static void bind_shadow_tmp_to_orig ( UChar how,
5268 MCEnv* mce,
5269 IRAtom* orig, IRAtom* shadow )
5270{
5271 tl_assert(isOriginalAtom(mce, orig));
5272 tl_assert(isShadowAtom(mce, shadow));
5273 switch (orig->tag) {
5274 case Iex_Const:
5275 tl_assert(shadow->tag == Iex_Const);
5276 break;
5277 case Iex_RdTmp:
5278 tl_assert(shadow->tag == Iex_RdTmp);
5279 if (how == 'V') {
5280 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
5281 shadow);
5282 } else {
5283 tl_assert(how == 'B');
5284 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
5285 shadow);
5286 }
5287 break;
5288 default:
5289 tl_assert(0);
5290 }
5291}
5292
5293
5294static
5295void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
5296{
5297 /* Scheme is (both single- and double- cases):
5298
5299 1. fetch data#,dataB (the proposed new value)
5300
5301 2. fetch expd#,expdB (what we expect to see at the address)
5302
5303 3. check definedness of address
5304
5305 4. load old#,oldB from shadow memory; this also checks
5306 addressibility of the address
5307
5308 5. the CAS itself
5309
sewardjafed4c52009-07-12 13:00:17 +00005310 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
sewardj1c0ce7a2009-07-01 08:10:49 +00005311
sewardjafed4c52009-07-12 13:00:17 +00005312 7. if "expected == old" (as computed by (6))
sewardj1c0ce7a2009-07-01 08:10:49 +00005313 store data#,dataB to shadow memory
5314
5315 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
5316 'data' but 7 stores 'data#'. Hence it is possible for the
5317 shadow data to be incorrectly checked and/or updated:
5318
sewardj1c0ce7a2009-07-01 08:10:49 +00005319 * 7 is at least gated correctly, since the 'expected == old'
5320 condition is derived from outputs of 5. However, the shadow
5321 write could happen too late: imagine after 5 we are
5322 descheduled, a different thread runs, writes a different
5323 (shadow) value at the address, and then we resume, hence
5324 overwriting the shadow value written by the other thread.
5325
5326 Because the original memory access is atomic, there's no way to
5327 make both the original and shadow accesses into a single atomic
5328 thing, hence this is unavoidable.
5329
5330 At least as Valgrind stands, I don't think it's a problem, since
5331 we're single threaded *and* we guarantee that there are no
5332 context switches during the execution of any specific superblock
5333 -- context switches can only happen at superblock boundaries.
5334
5335 If Valgrind ever becomes MT in the future, then it might be more
5336 of a problem. A possible kludge would be to artificially
5337 associate with the location, a lock, which we must acquire and
5338 release around the transaction as a whole. Hmm, that probably
5339 would't work properly since it only guards us against other
5340 threads doing CASs on the same location, not against other
5341 threads doing normal reads and writes.
sewardjafed4c52009-07-12 13:00:17 +00005342
5343 ------------------------------------------------------------
5344
5345 COMMENT_ON_CasCmpEQ:
5346
5347 Note two things. Firstly, in the sequence above, we compute
5348 "expected == old", but we don't check definedness of it. Why
5349 not? Also, the x86 and amd64 front ends use
sewardjb9e6d242013-05-11 13:42:08 +00005350 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
sewardjafed4c52009-07-12 13:00:17 +00005351 determination (expected == old ?) for themselves, and we also
5352 don't check definedness for those primops; we just say that the
5353 result is defined. Why? Details follow.
5354
5355 x86/amd64 contains various forms of locked insns:
5356 * lock prefix before all basic arithmetic insn;
5357 eg lock xorl %reg1,(%reg2)
5358 * atomic exchange reg-mem
5359 * compare-and-swaps
5360
5361 Rather than attempt to represent them all, which would be a
5362 royal PITA, I used a result from Maurice Herlihy
5363 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
5364 demonstrates that compare-and-swap is a primitive more general
5365 than the other two, and so can be used to represent all of them.
5366 So the translation scheme for (eg) lock incl (%reg) is as
5367 follows:
5368
5369 again:
5370 old = * %reg
5371 new = old + 1
5372 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
5373
5374 The "atomically" is the CAS bit. The scheme is always the same:
5375 get old value from memory, compute new value, atomically stuff
5376 new value back in memory iff the old value has not changed (iow,
5377 no other thread modified it in the meantime). If it has changed
5378 then we've been out-raced and we have to start over.
5379
5380 Now that's all very neat, but it has the bad side effect of
5381 introducing an explicit equality test into the translation.
5382 Consider the behaviour of said code on a memory location which
5383 is uninitialised. We will wind up doing a comparison on
5384 uninitialised data, and mc duly complains.
5385
5386 What's difficult about this is, the common case is that the
5387 location is uncontended, and so we're usually comparing the same
5388 value (* %reg) with itself. So we shouldn't complain even if it
5389 is undefined. But mc doesn't know that.
5390
5391 My solution is to mark the == in the IR specially, so as to tell
5392 mc that it almost certainly compares a value with itself, and we
5393 should just regard the result as always defined. Rather than
5394 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
5395 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
5396
5397 So there's always the question of, can this give a false
5398 negative? eg, imagine that initially, * %reg is defined; and we
5399 read that; but then in the gap between the read and the CAS, a
5400 different thread writes an undefined (and different) value at
5401 the location. Then the CAS in this thread will fail and we will
5402 go back to "again:", but without knowing that the trip back
5403 there was based on an undefined comparison. No matter; at least
5404 the other thread won the race and the location is correctly
5405 marked as undefined. What if it wrote an uninitialised version
5406 of the same value that was there originally, though?
5407
5408 etc etc. Seems like there's a small corner case in which we
5409 might lose the fact that something's defined -- we're out-raced
5410 in between the "old = * reg" and the "atomically {", _and_ the
5411 other thread is writing in an undefined version of what's
5412 already there. Well, that seems pretty unlikely.
5413
5414 ---
5415
5416 If we ever need to reinstate it .. code which generates a
5417 definedness test for "expected == old" was removed at r10432 of
5418 this file.
sewardj1c0ce7a2009-07-01 08:10:49 +00005419 */
5420 if (cas->oldHi == IRTemp_INVALID) {
5421 do_shadow_CAS_single( mce, cas );
5422 } else {
5423 do_shadow_CAS_double( mce, cas );
5424 }
5425}
5426
5427
5428static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
5429{
5430 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5431 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5432 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00005433 IRAtom *expd_eq_old = NULL;
5434 IROp opCasCmpEQ;
sewardj1c0ce7a2009-07-01 08:10:49 +00005435 Int elemSzB;
5436 IRType elemTy;
5437 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5438
5439 /* single CAS */
5440 tl_assert(cas->oldHi == IRTemp_INVALID);
5441 tl_assert(cas->expdHi == NULL);
5442 tl_assert(cas->dataHi == NULL);
5443
5444 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5445 switch (elemTy) {
sewardjafed4c52009-07-12 13:00:17 +00005446 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
5447 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
5448 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
5449 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
sewardj1c0ce7a2009-07-01 08:10:49 +00005450 default: tl_assert(0); /* IR defn disallows any other types */
5451 }
5452
5453 /* 1. fetch data# (the proposed new value) */
5454 tl_assert(isOriginalAtom(mce, cas->dataLo));
5455 vdataLo
5456 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5457 tl_assert(isShadowAtom(mce, vdataLo));
5458 if (otrak) {
5459 bdataLo
5460 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5461 tl_assert(isShadowAtom(mce, bdataLo));
5462 }
5463
5464 /* 2. fetch expected# (what we expect to see at the address) */
5465 tl_assert(isOriginalAtom(mce, cas->expdLo));
5466 vexpdLo
5467 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5468 tl_assert(isShadowAtom(mce, vexpdLo));
5469 if (otrak) {
5470 bexpdLo
5471 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5472 tl_assert(isShadowAtom(mce, bexpdLo));
5473 }
5474
5475 /* 3. check definedness of address */
5476 /* 4. fetch old# from shadow memory; this also checks
5477 addressibility of the address */
5478 voldLo
5479 = assignNew(
5480 'V', mce, elemTy,
5481 expr2vbits_Load(
5482 mce,
sewardjcafe5052013-01-17 14:24:35 +00005483 cas->end, elemTy, cas->addr, 0/*Addr bias*/,
5484 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005485 ));
sewardjafed4c52009-07-12 13:00:17 +00005486 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005487 if (otrak) {
5488 boldLo
5489 = assignNew('B', mce, Ity_I32,
5490 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005491 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005492 }
5493
5494 /* 5. the CAS itself */
5495 stmt( 'C', mce, IRStmt_CAS(cas) );
5496
sewardjafed4c52009-07-12 13:00:17 +00005497 /* 6. compute "expected == old" */
5498 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005499 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5500 tree, but it's not copied from the input block. */
5501 expd_eq_old
5502 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005503 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005504
5505 /* 7. if "expected == old"
5506 store data# to shadow memory */
5507 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
5508 NULL/*data*/, vdataLo/*vdata*/,
5509 expd_eq_old/*guard for store*/ );
5510 if (otrak) {
5511 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
5512 bdataLo/*bdata*/,
5513 expd_eq_old/*guard for store*/ );
5514 }
5515}
5516
5517
5518static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
5519{
5520 IRAtom *vdataHi = NULL, *bdataHi = NULL;
5521 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5522 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
5523 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5524 IRAtom *voldHi = NULL, *boldHi = NULL;
5525 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00005526 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
5527 IRAtom *expd_eq_old = NULL, *zero = NULL;
5528 IROp opCasCmpEQ, opOr, opXor;
sewardj1c0ce7a2009-07-01 08:10:49 +00005529 Int elemSzB, memOffsLo, memOffsHi;
5530 IRType elemTy;
5531 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5532
5533 /* double CAS */
5534 tl_assert(cas->oldHi != IRTemp_INVALID);
5535 tl_assert(cas->expdHi != NULL);
5536 tl_assert(cas->dataHi != NULL);
5537
5538 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5539 switch (elemTy) {
5540 case Ity_I8:
sewardjafed4c52009-07-12 13:00:17 +00005541 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
sewardj1c0ce7a2009-07-01 08:10:49 +00005542 elemSzB = 1; zero = mkU8(0);
5543 break;
5544 case Ity_I16:
sewardjafed4c52009-07-12 13:00:17 +00005545 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
sewardj1c0ce7a2009-07-01 08:10:49 +00005546 elemSzB = 2; zero = mkU16(0);
5547 break;
5548 case Ity_I32:
sewardjafed4c52009-07-12 13:00:17 +00005549 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
sewardj1c0ce7a2009-07-01 08:10:49 +00005550 elemSzB = 4; zero = mkU32(0);
5551 break;
5552 case Ity_I64:
sewardjafed4c52009-07-12 13:00:17 +00005553 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
sewardj1c0ce7a2009-07-01 08:10:49 +00005554 elemSzB = 8; zero = mkU64(0);
5555 break;
5556 default:
5557 tl_assert(0); /* IR defn disallows any other types */
5558 }
5559
5560 /* 1. fetch data# (the proposed new value) */
5561 tl_assert(isOriginalAtom(mce, cas->dataHi));
5562 tl_assert(isOriginalAtom(mce, cas->dataLo));
5563 vdataHi
5564 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
5565 vdataLo
5566 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5567 tl_assert(isShadowAtom(mce, vdataHi));
5568 tl_assert(isShadowAtom(mce, vdataLo));
5569 if (otrak) {
5570 bdataHi
5571 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
5572 bdataLo
5573 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5574 tl_assert(isShadowAtom(mce, bdataHi));
5575 tl_assert(isShadowAtom(mce, bdataLo));
5576 }
5577
5578 /* 2. fetch expected# (what we expect to see at the address) */
5579 tl_assert(isOriginalAtom(mce, cas->expdHi));
5580 tl_assert(isOriginalAtom(mce, cas->expdLo));
5581 vexpdHi
5582 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5583 vexpdLo
5584 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5585 tl_assert(isShadowAtom(mce, vexpdHi));
5586 tl_assert(isShadowAtom(mce, vexpdLo));
5587 if (otrak) {
5588 bexpdHi
5589 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5590 bexpdLo
5591 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5592 tl_assert(isShadowAtom(mce, bexpdHi));
5593 tl_assert(isShadowAtom(mce, bexpdLo));
5594 }
5595
5596 /* 3. check definedness of address */
5597 /* 4. fetch old# from shadow memory; this also checks
5598 addressibility of the address */
5599 if (cas->end == Iend_LE) {
5600 memOffsLo = 0;
5601 memOffsHi = elemSzB;
5602 } else {
5603 tl_assert(cas->end == Iend_BE);
5604 memOffsLo = elemSzB;
5605 memOffsHi = 0;
5606 }
5607 voldHi
5608 = assignNew(
5609 'V', mce, elemTy,
5610 expr2vbits_Load(
5611 mce,
sewardjcafe5052013-01-17 14:24:35 +00005612 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/,
5613 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005614 ));
5615 voldLo
5616 = assignNew(
5617 'V', mce, elemTy,
5618 expr2vbits_Load(
5619 mce,
sewardjcafe5052013-01-17 14:24:35 +00005620 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/,
5621 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005622 ));
sewardjafed4c52009-07-12 13:00:17 +00005623 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5624 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005625 if (otrak) {
5626 boldHi
5627 = assignNew('B', mce, Ity_I32,
5628 gen_load_b(mce, elemSzB, cas->addr,
5629 memOffsHi/*addr bias*/));
5630 boldLo
5631 = assignNew('B', mce, Ity_I32,
5632 gen_load_b(mce, elemSzB, cas->addr,
5633 memOffsLo/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005634 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5635 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005636 }
5637
5638 /* 5. the CAS itself */
5639 stmt( 'C', mce, IRStmt_CAS(cas) );
5640
sewardjafed4c52009-07-12 13:00:17 +00005641 /* 6. compute "expected == old" */
5642 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005643 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5644 tree, but it's not copied from the input block. */
5645 /*
5646 xHi = oldHi ^ expdHi;
5647 xLo = oldLo ^ expdLo;
5648 xHL = xHi | xLo;
5649 expd_eq_old = xHL == 0;
5650 */
sewardj1c0ce7a2009-07-01 08:10:49 +00005651 xHi = assignNew('C', mce, elemTy,
5652 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005653 xLo = assignNew('C', mce, elemTy,
5654 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005655 xHL = assignNew('C', mce, elemTy,
5656 binop(opOr, xHi, xLo));
sewardj1c0ce7a2009-07-01 08:10:49 +00005657 expd_eq_old
5658 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005659 binop(opCasCmpEQ, xHL, zero));
sewardj1c0ce7a2009-07-01 08:10:49 +00005660
5661 /* 7. if "expected == old"
5662 store data# to shadow memory */
5663 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5664 NULL/*data*/, vdataHi/*vdata*/,
5665 expd_eq_old/*guard for store*/ );
5666 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5667 NULL/*data*/, vdataLo/*vdata*/,
5668 expd_eq_old/*guard for store*/ );
5669 if (otrak) {
5670 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5671 bdataHi/*bdata*/,
5672 expd_eq_old/*guard for store*/ );
5673 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5674 bdataLo/*bdata*/,
5675 expd_eq_old/*guard for store*/ );
5676 }
5677}
5678
5679
sewardjdb5907d2009-11-26 17:20:21 +00005680/* ------ Dealing with LL/SC (not difficult) ------ */
5681
5682static void do_shadow_LLSC ( MCEnv* mce,
5683 IREndness stEnd,
5684 IRTemp stResult,
5685 IRExpr* stAddr,
5686 IRExpr* stStoredata )
5687{
5688 /* In short: treat a load-linked like a normal load followed by an
5689 assignment of the loaded (shadow) data to the result temporary.
5690 Treat a store-conditional like a normal store, and mark the
5691 result temporary as defined. */
5692 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
5693 IRTemp resTmp = findShadowTmpV(mce, stResult);
5694
5695 tl_assert(isIRAtom(stAddr));
5696 if (stStoredata)
5697 tl_assert(isIRAtom(stStoredata));
5698
5699 if (stStoredata == NULL) {
5700 /* Load Linked */
5701 /* Just treat this as a normal load, followed by an assignment of
5702 the value to .result. */
5703 /* Stay sane */
5704 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5705 || resTy == Ity_I16 || resTy == Ity_I8);
5706 assign( 'V', mce, resTmp,
5707 expr2vbits_Load(
sewardjcafe5052013-01-17 14:24:35 +00005708 mce, stEnd, resTy, stAddr, 0/*addr bias*/,
5709 NULL/*always happens*/) );
sewardjdb5907d2009-11-26 17:20:21 +00005710 } else {
5711 /* Store Conditional */
5712 /* Stay sane */
5713 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
5714 stStoredata);
5715 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
5716 || dataTy == Ity_I16 || dataTy == Ity_I8);
5717 do_shadow_Store( mce, stEnd,
5718 stAddr, 0/* addr bias */,
5719 stStoredata,
5720 NULL /* shadow data */,
5721 NULL/*guard*/ );
5722 /* This is a store conditional, so it writes to .result a value
5723 indicating whether or not the store succeeded. Just claim
5724 this value is always defined. In the PowerPC interpretation
5725 of store-conditional, definedness of the success indication
5726 depends on whether the address of the store matches the
5727 reservation address. But we can't tell that here (and
5728 anyway, we're not being PowerPC-specific). At least we are
5729 guaranteed that the definedness of the store address, and its
5730 addressibility, will be checked as per normal. So it seems
5731 pretty safe to just say that the success indication is always
5732 defined.
5733
5734 In schemeS, for origin tracking, we must correspondingly set
5735 a no-origin value for the origin shadow of .result.
5736 */
5737 tl_assert(resTy == Ity_I1);
5738 assign( 'V', mce, resTmp, definedOfType(resTy) );
5739 }
5740}
5741
5742
sewardjcafe5052013-01-17 14:24:35 +00005743/* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
5744
5745static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg )
5746{
sewardjb9e6d242013-05-11 13:42:08 +00005747 complainIfUndefined(mce, sg->guard, NULL);
5748 /* do_shadow_Store will generate code to check the definedness and
5749 validity of sg->addr, in the case where sg->guard evaluates to
5750 True at run-time. */
sewardjcafe5052013-01-17 14:24:35 +00005751 do_shadow_Store( mce, sg->end,
5752 sg->addr, 0/* addr bias */,
5753 sg->data,
5754 NULL /* shadow data */,
5755 sg->guard );
5756}
5757
5758static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg )
5759{
sewardjb9e6d242013-05-11 13:42:08 +00005760 complainIfUndefined(mce, lg->guard, NULL);
5761 /* expr2vbits_Load_guarded_General will generate code to check the
5762 definedness and validity of lg->addr, in the case where
5763 lg->guard evaluates to True at run-time. */
sewardjcafe5052013-01-17 14:24:35 +00005764
5765 /* Look at the LoadG's built-in conversion operation, to determine
5766 the source (actual loaded data) type, and the equivalent IROp.
5767 NOTE that implicitly we are taking a widening operation to be
5768 applied to original atoms and producing one that applies to V
5769 bits. Since signed and unsigned widening are self-shadowing,
5770 this is a straight copy of the op (modulo swapping from the
5771 IRLoadGOp form to the IROp form). Note also therefore that this
5772 implicitly duplicates the logic to do with said widening ops in
5773 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */
5774 IROp vwiden = Iop_INVALID;
5775 IRType loadedTy = Ity_INVALID;
5776 switch (lg->cvt) {
5777 case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break;
5778 case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break;
5779 case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break;
5780 case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break;
5781 case ILGop_8Sto32: loadedTy = Ity_I8; vwiden = Iop_8Sto32; break;
5782 default: VG_(tool_panic)("do_shadow_LoadG");
5783 }
5784
5785 IRAtom* vbits_alt
5786 = expr2vbits( mce, lg->alt );
5787 IRAtom* vbits_final
5788 = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy,
5789 lg->addr, 0/*addr bias*/,
5790 lg->guard, vwiden, vbits_alt );
5791 /* And finally, bind the V bits to the destination temporary. */
5792 assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final );
5793}
5794
5795
sewardj95448072004-11-22 20:19:51 +00005796/*------------------------------------------------------------*/
5797/*--- Memcheck main ---*/
5798/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00005799
sewardj7cf4e6b2008-05-01 20:24:26 +00005800static void schemeS ( MCEnv* mce, IRStmt* st );
5801
sewardj95448072004-11-22 20:19:51 +00005802static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00005803{
sewardj95448072004-11-22 20:19:51 +00005804 ULong n = 0;
5805 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00005806 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00005807 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00005808 return False;
5809 tl_assert(at->tag == Iex_Const);
5810 con = at->Iex.Const.con;
5811 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00005812 case Ico_U1: return False;
5813 case Ico_U8: n = (ULong)con->Ico.U8; break;
5814 case Ico_U16: n = (ULong)con->Ico.U16; break;
5815 case Ico_U32: n = (ULong)con->Ico.U32; break;
5816 case Ico_U64: n = (ULong)con->Ico.U64; break;
5817 case Ico_F64: return False;
sewardjb5b87402011-03-07 16:05:35 +00005818 case Ico_F32i: return False;
sewardjd5204dc2004-12-31 01:16:11 +00005819 case Ico_F64i: return False;
5820 case Ico_V128: return False;
sewardj1eb272f2014-01-26 18:36:52 +00005821 case Ico_V256: return False;
sewardj95448072004-11-22 20:19:51 +00005822 default: ppIRExpr(at); tl_assert(0);
5823 }
5824 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00005825 return (/*32*/ n == 0xFEFEFEFFULL
5826 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00005827 /*32*/ || n == 0x7F7F7F7FULL
sewardja150fe92013-12-11 16:49:46 +00005828 /*32*/ || n == 0x7EFEFEFFULL
5829 /*32*/ || n == 0x81010100ULL
tomd9774d72005-06-27 08:11:01 +00005830 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00005831 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00005832 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00005833 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00005834 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00005835 );
sewardj95448072004-11-22 20:19:51 +00005836}
njn25e49d8e72002-09-23 09:36:25 +00005837
sewardj95448072004-11-22 20:19:51 +00005838static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
5839{
sewardjd5204dc2004-12-31 01:16:11 +00005840 Int i;
5841 IRExpr* e;
5842 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00005843 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00005844 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00005845 case Ist_WrTmp:
5846 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00005847 switch (e->tag) {
5848 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00005849 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00005850 return False;
sewardjd5204dc2004-12-31 01:16:11 +00005851 case Iex_Const:
5852 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00005853 case Iex_Unop:
sewardja150fe92013-12-11 16:49:46 +00005854 return isBogusAtom(e->Iex.Unop.arg)
5855 || e->Iex.Unop.op == Iop_GetMSBs8x16;
sewardjd5204dc2004-12-31 01:16:11 +00005856 case Iex_GetI:
5857 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00005858 case Iex_Binop:
5859 return isBogusAtom(e->Iex.Binop.arg1)
5860 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00005861 case Iex_Triop:
florian26441742012-06-02 20:30:41 +00005862 return isBogusAtom(e->Iex.Triop.details->arg1)
5863 || isBogusAtom(e->Iex.Triop.details->arg2)
5864 || isBogusAtom(e->Iex.Triop.details->arg3);
sewardje91cea72006-02-08 19:32:02 +00005865 case Iex_Qop:
floriane2ab2972012-06-01 20:43:03 +00005866 return isBogusAtom(e->Iex.Qop.details->arg1)
5867 || isBogusAtom(e->Iex.Qop.details->arg2)
5868 || isBogusAtom(e->Iex.Qop.details->arg3)
5869 || isBogusAtom(e->Iex.Qop.details->arg4);
florian5686b2d2013-01-29 03:57:40 +00005870 case Iex_ITE:
5871 return isBogusAtom(e->Iex.ITE.cond)
5872 || isBogusAtom(e->Iex.ITE.iftrue)
5873 || isBogusAtom(e->Iex.ITE.iffalse);
sewardj2e595852005-06-30 23:33:37 +00005874 case Iex_Load:
5875 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00005876 case Iex_CCall:
5877 for (i = 0; e->Iex.CCall.args[i]; i++)
5878 if (isBogusAtom(e->Iex.CCall.args[i]))
5879 return True;
5880 return False;
5881 default:
5882 goto unhandled;
5883 }
sewardjd5204dc2004-12-31 01:16:11 +00005884 case Ist_Dirty:
5885 d = st->Ist.Dirty.details;
sewardj21a5f8c2013-08-08 10:41:46 +00005886 for (i = 0; d->args[i]; i++) {
5887 IRAtom* atom = d->args[i];
floriana5c3ecb2013-08-15 20:55:42 +00005888 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(atom))) {
sewardj21a5f8c2013-08-08 10:41:46 +00005889 if (isBogusAtom(atom))
5890 return True;
5891 }
5892 }
florian6c0aa2c2013-01-21 01:27:22 +00005893 if (isBogusAtom(d->guard))
sewardjd5204dc2004-12-31 01:16:11 +00005894 return True;
5895 if (d->mAddr && isBogusAtom(d->mAddr))
5896 return True;
5897 return False;
sewardj95448072004-11-22 20:19:51 +00005898 case Ist_Put:
5899 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00005900 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00005901 return isBogusAtom(st->Ist.PutI.details->ix)
5902 || isBogusAtom(st->Ist.PutI.details->data);
sewardj2e595852005-06-30 23:33:37 +00005903 case Ist_Store:
5904 return isBogusAtom(st->Ist.Store.addr)
5905 || isBogusAtom(st->Ist.Store.data);
sewardjcafe5052013-01-17 14:24:35 +00005906 case Ist_StoreG: {
5907 IRStoreG* sg = st->Ist.StoreG.details;
5908 return isBogusAtom(sg->addr) || isBogusAtom(sg->data)
5909 || isBogusAtom(sg->guard);
5910 }
5911 case Ist_LoadG: {
5912 IRLoadG* lg = st->Ist.LoadG.details;
5913 return isBogusAtom(lg->addr) || isBogusAtom(lg->alt)
5914 || isBogusAtom(lg->guard);
5915 }
sewardj95448072004-11-22 20:19:51 +00005916 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00005917 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00005918 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00005919 return isBogusAtom(st->Ist.AbiHint.base)
5920 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00005921 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00005922 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00005923 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00005924 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005925 case Ist_CAS:
5926 cas = st->Ist.CAS.details;
5927 return isBogusAtom(cas->addr)
5928 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
5929 || isBogusAtom(cas->expdLo)
5930 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
5931 || isBogusAtom(cas->dataLo);
sewardjdb5907d2009-11-26 17:20:21 +00005932 case Ist_LLSC:
5933 return isBogusAtom(st->Ist.LLSC.addr)
5934 || (st->Ist.LLSC.storedata
5935 ? isBogusAtom(st->Ist.LLSC.storedata)
5936 : False);
sewardj95448072004-11-22 20:19:51 +00005937 default:
5938 unhandled:
5939 ppIRStmt(st);
5940 VG_(tool_panic)("hasBogusLiterals");
5941 }
5942}
njn25e49d8e72002-09-23 09:36:25 +00005943
njn25e49d8e72002-09-23 09:36:25 +00005944
sewardj0b9d74a2006-12-24 02:24:11 +00005945IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00005946 IRSB* sb_in,
sewardj461df9c2006-01-17 02:06:39 +00005947 VexGuestLayout* layout,
5948 VexGuestExtents* vge,
florianca503be2012-10-07 21:59:42 +00005949 VexArchInfo* archinfo_host,
sewardjd54babf2005-03-21 00:55:49 +00005950 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00005951{
sewardj7cf4e6b2008-05-01 20:24:26 +00005952 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00005953 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00005954 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00005955 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00005956 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00005957 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00005958
5959 if (gWordTy != hWordTy) {
5960 /* We don't currently support this case. */
5961 VG_(tool_panic)("host/guest word size mismatch");
5962 }
njn25e49d8e72002-09-23 09:36:25 +00005963
sewardj6cf40ff2005-04-20 22:31:26 +00005964 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00005965 tl_assert(sizeof(UWord) == sizeof(void*));
5966 tl_assert(sizeof(Word) == sizeof(void*));
5967 tl_assert(sizeof(Addr) == sizeof(void*));
5968 tl_assert(sizeof(ULong) == 8);
5969 tl_assert(sizeof(Long) == 8);
5970 tl_assert(sizeof(Addr64) == 8);
5971 tl_assert(sizeof(UInt) == 4);
5972 tl_assert(sizeof(Int) == 4);
5973
5974 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00005975
sewardj0b9d74a2006-12-24 02:24:11 +00005976 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00005977 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00005978
sewardj1c0ce7a2009-07-01 08:10:49 +00005979 /* Set up the running environment. Both .sb and .tmpMap are
5980 modified as we go along. Note that tmps are added to both
5981 .sb->tyenv and .tmpMap together, so the valid index-set for
5982 those two arrays should always be identical. */
5983 VG_(memset)(&mce, 0, sizeof(mce));
5984 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00005985 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00005986 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00005987 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00005988 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005989
sewardj54eac252012-03-27 10:19:39 +00005990 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
5991 Darwin. 10.7 is mostly built with LLVM, which uses these for
5992 bitfield inserts, and we get a lot of false errors if the cheap
5993 interpretation is used, alas. Could solve this much better if
5994 we knew which of such adds came from x86/amd64 LEA instructions,
5995 since these are the only ones really needing the expensive
5996 interpretation, but that would require some way to tag them in
5997 the _toIR.c front ends, which is a lot of faffing around. So
5998 for now just use the slow and blunt-instrument solution. */
5999 mce.useLLVMworkarounds = False;
6000# if defined(VGO_darwin)
6001 mce.useLLVMworkarounds = True;
6002# endif
6003
sewardj1c0ce7a2009-07-01 08:10:49 +00006004 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
6005 sizeof(TempMapEnt));
6006 for (i = 0; i < sb_in->tyenv->types_used; i++) {
6007 TempMapEnt ent;
6008 ent.kind = Orig;
6009 ent.shadowV = IRTemp_INVALID;
6010 ent.shadowB = IRTemp_INVALID;
6011 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00006012 }
sewardj1c0ce7a2009-07-01 08:10:49 +00006013 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00006014
sewardj151b90d2005-07-06 19:42:23 +00006015 /* Make a preliminary inspection of the statements, to see if there
6016 are any dodgy-looking literals. If there are, we generate
6017 extra-detailed (hence extra-expensive) instrumentation in
6018 places. Scan the whole bb even if dodgyness is found earlier,
6019 so that the flatness assertion is applied to all stmts. */
6020
6021 bogus = False;
sewardj95448072004-11-22 20:19:51 +00006022
sewardj1c0ce7a2009-07-01 08:10:49 +00006023 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00006024
sewardj1c0ce7a2009-07-01 08:10:49 +00006025 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00006026 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00006027 tl_assert(isFlatIRStmt(st));
6028
sewardj151b90d2005-07-06 19:42:23 +00006029 if (!bogus) {
6030 bogus = checkForBogusLiterals(st);
6031 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00006032 VG_(printf)("bogus: ");
6033 ppIRStmt(st);
6034 VG_(printf)("\n");
6035 }
6036 }
sewardjd5204dc2004-12-31 01:16:11 +00006037
sewardj151b90d2005-07-06 19:42:23 +00006038 }
6039
6040 mce.bogusLiterals = bogus;
6041
sewardja0871482006-10-18 12:41:55 +00006042 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00006043
sewardj1c0ce7a2009-07-01 08:10:49 +00006044 tl_assert(mce.sb == sb_out);
6045 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00006046
sewardja0871482006-10-18 12:41:55 +00006047 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00006048 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00006049
sewardj1c0ce7a2009-07-01 08:10:49 +00006050 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00006051 tl_assert(st);
6052 tl_assert(isFlatIRStmt(st));
6053
sewardj1c0ce7a2009-07-01 08:10:49 +00006054 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00006055 i++;
6056 }
6057
sewardjf1962d32006-10-19 13:22:16 +00006058 /* Nasty problem. IR optimisation of the pre-instrumented IR may
6059 cause the IR following the preamble to contain references to IR
6060 temporaries defined in the preamble. Because the preamble isn't
6061 instrumented, these temporaries don't have any shadows.
6062 Nevertheless uses of them following the preamble will cause
6063 memcheck to generate references to their shadows. End effect is
6064 to cause IR sanity check failures, due to references to
6065 non-existent shadows. This is only evident for the complex
6066 preambles used for function wrapping on TOC-afflicted platforms
sewardj6e9de462011-06-28 07:25:29 +00006067 (ppc64-linux).
sewardjf1962d32006-10-19 13:22:16 +00006068
6069 The following loop therefore scans the preamble looking for
6070 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00006071 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00006072 'defined'. This is the same resulting IR as if the main
6073 instrumentation loop before had been applied to the statement
6074 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00006075
6076 Similarly, if origin tracking is enabled, we must generate an
6077 assignment for the corresponding origin (B) shadow, claiming
6078 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00006079 */
6080 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006081 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006082 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00006083 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00006084 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00006085 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00006086 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00006087 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
6088 if (MC_(clo_mc_level) == 3) {
6089 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00006090 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00006091 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
6092 }
sewardjf1962d32006-10-19 13:22:16 +00006093 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00006094 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
6095 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00006096 VG_(printf)("\n");
6097 }
6098 }
6099 }
6100
sewardja0871482006-10-18 12:41:55 +00006101 /* Iterate over the remaining stmts to generate instrumentation. */
6102
sewardj1c0ce7a2009-07-01 08:10:49 +00006103 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00006104 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00006105 tl_assert(i < sb_in->stmts_used);
6106 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00006107
sewardj1c0ce7a2009-07-01 08:10:49 +00006108 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00006109
sewardj1c0ce7a2009-07-01 08:10:49 +00006110 st = sb_in->stmts[i];
6111 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00006112
6113 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006114 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00006115 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00006116 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00006117 }
6118
sewardj1c0ce7a2009-07-01 08:10:49 +00006119 if (MC_(clo_mc_level) == 3) {
6120 /* See comments on case Ist_CAS below. */
6121 if (st->tag != Ist_CAS)
6122 schemeS( &mce, st );
6123 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006124
sewardj29faa502005-03-16 18:20:21 +00006125 /* Generate instrumentation code for each stmt ... */
6126
sewardj95448072004-11-22 20:19:51 +00006127 switch (st->tag) {
6128
sewardj0b9d74a2006-12-24 02:24:11 +00006129 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00006130 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
6131 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00006132 break;
6133
sewardj95448072004-11-22 20:19:51 +00006134 case Ist_Put:
6135 do_shadow_PUT( &mce,
6136 st->Ist.Put.offset,
6137 st->Ist.Put.data,
florian434ffae2012-07-19 17:23:42 +00006138 NULL /* shadow atom */, NULL /* guard */ );
njn25e49d8e72002-09-23 09:36:25 +00006139 break;
6140
sewardj95448072004-11-22 20:19:51 +00006141 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00006142 do_shadow_PUTI( &mce, st->Ist.PutI.details);
njn25e49d8e72002-09-23 09:36:25 +00006143 break;
6144
sewardj2e595852005-06-30 23:33:37 +00006145 case Ist_Store:
6146 do_shadow_Store( &mce, st->Ist.Store.end,
6147 st->Ist.Store.addr, 0/* addr bias */,
6148 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00006149 NULL /* shadow data */,
6150 NULL/*guard*/ );
njn25e49d8e72002-09-23 09:36:25 +00006151 break;
6152
sewardjcafe5052013-01-17 14:24:35 +00006153 case Ist_StoreG:
6154 do_shadow_StoreG( &mce, st->Ist.StoreG.details );
6155 break;
6156
6157 case Ist_LoadG:
6158 do_shadow_LoadG( &mce, st->Ist.LoadG.details );
6159 break;
6160
sewardj95448072004-11-22 20:19:51 +00006161 case Ist_Exit:
sewardjb9e6d242013-05-11 13:42:08 +00006162 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
njn25e49d8e72002-09-23 09:36:25 +00006163 break;
6164
sewardj29faa502005-03-16 18:20:21 +00006165 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00006166 break;
6167
6168 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00006169 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00006170 break;
6171
sewardj95448072004-11-22 20:19:51 +00006172 case Ist_Dirty:
6173 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00006174 break;
6175
sewardj826ec492005-05-12 18:05:00 +00006176 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00006177 do_AbiHint( &mce, st->Ist.AbiHint.base,
6178 st->Ist.AbiHint.len,
6179 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00006180 break;
6181
sewardj1c0ce7a2009-07-01 08:10:49 +00006182 case Ist_CAS:
6183 do_shadow_CAS( &mce, st->Ist.CAS.details );
6184 /* Note, do_shadow_CAS copies the CAS itself to the output
6185 block, because it needs to add instrumentation both
6186 before and after it. Hence skip the copy below. Also
6187 skip the origin-tracking stuff (call to schemeS) above,
6188 since that's all tangled up with it too; do_shadow_CAS
6189 does it all. */
6190 break;
6191
sewardjdb5907d2009-11-26 17:20:21 +00006192 case Ist_LLSC:
6193 do_shadow_LLSC( &mce,
6194 st->Ist.LLSC.end,
6195 st->Ist.LLSC.result,
6196 st->Ist.LLSC.addr,
6197 st->Ist.LLSC.storedata );
6198 break;
6199
njn25e49d8e72002-09-23 09:36:25 +00006200 default:
sewardj95448072004-11-22 20:19:51 +00006201 VG_(printf)("\n");
6202 ppIRStmt(st);
6203 VG_(printf)("\n");
6204 VG_(tool_panic)("memcheck: unhandled IRStmt");
6205
6206 } /* switch (st->tag) */
6207
sewardj7cf4e6b2008-05-01 20:24:26 +00006208 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006209 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00006210 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00006211 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00006212 VG_(printf)("\n");
6213 }
6214 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006215 }
sewardj95448072004-11-22 20:19:51 +00006216
sewardj1c0ce7a2009-07-01 08:10:49 +00006217 /* ... and finally copy the stmt itself to the output. Except,
6218 skip the copy of IRCASs; see comments on case Ist_CAS
6219 above. */
6220 if (st->tag != Ist_CAS)
6221 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00006222 }
njn25e49d8e72002-09-23 09:36:25 +00006223
sewardj95448072004-11-22 20:19:51 +00006224 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00006225 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00006226
sewardj95448072004-11-22 20:19:51 +00006227 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006228 VG_(printf)("sb_in->next = ");
6229 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00006230 VG_(printf)("\n\n");
6231 }
njn25e49d8e72002-09-23 09:36:25 +00006232
sewardjb9e6d242013-05-11 13:42:08 +00006233 complainIfUndefined( &mce, sb_in->next, NULL );
njn25e49d8e72002-09-23 09:36:25 +00006234
sewardj7cf4e6b2008-05-01 20:24:26 +00006235 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006236 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00006237 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00006238 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00006239 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006240 }
sewardj95448072004-11-22 20:19:51 +00006241 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006242 }
njn25e49d8e72002-09-23 09:36:25 +00006243
sewardj1c0ce7a2009-07-01 08:10:49 +00006244 /* If this fails, there's been some serious snafu with tmp management,
6245 that should be investigated. */
6246 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
6247 VG_(deleteXA)( mce.tmpMap );
6248
6249 tl_assert(mce.sb == sb_out);
6250 return sb_out;
sewardj95448072004-11-22 20:19:51 +00006251}
njn25e49d8e72002-09-23 09:36:25 +00006252
sewardj81651dc2007-08-28 06:05:20 +00006253/*------------------------------------------------------------*/
6254/*--- Post-tree-build final tidying ---*/
6255/*------------------------------------------------------------*/
6256
6257/* This exploits the observation that Memcheck often produces
6258 repeated conditional calls of the form
6259
sewardj7cf4e6b2008-05-01 20:24:26 +00006260 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00006261
6262 with the same guard expression G guarding the same helper call.
6263 The second and subsequent calls are redundant. This usually
6264 results from instrumentation of guest code containing multiple
6265 memory references at different constant offsets from the same base
6266 register. After optimisation of the instrumentation, you get a
6267 test for the definedness of the base register for each memory
6268 reference, which is kinda pointless. MC_(final_tidy) therefore
6269 looks for such repeated calls and removes all but the first. */
6270
6271/* A struct for recording which (helper, guard) pairs we have already
6272 seen. */
6273typedef
6274 struct { void* entry; IRExpr* guard; }
6275 Pair;
6276
6277/* Return True if e1 and e2 definitely denote the same value (used to
6278 compare guards). Return False if unknown; False is the safe
6279 answer. Since guest registers and guest memory do not have the
6280 SSA property we must return False if any Gets or Loads appear in
6281 the expression. */
6282
6283static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
6284{
6285 if (e1->tag != e2->tag)
6286 return False;
6287 switch (e1->tag) {
6288 case Iex_Const:
6289 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
6290 case Iex_Binop:
6291 return e1->Iex.Binop.op == e2->Iex.Binop.op
6292 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
6293 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
6294 case Iex_Unop:
6295 return e1->Iex.Unop.op == e2->Iex.Unop.op
6296 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
6297 case Iex_RdTmp:
6298 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
florian5686b2d2013-01-29 03:57:40 +00006299 case Iex_ITE:
6300 return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond )
6301 && sameIRValue( e1->Iex.ITE.iftrue, e2->Iex.ITE.iftrue )
6302 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse );
sewardj81651dc2007-08-28 06:05:20 +00006303 case Iex_Qop:
6304 case Iex_Triop:
6305 case Iex_CCall:
6306 /* be lazy. Could define equality for these, but they never
6307 appear to be used. */
6308 return False;
6309 case Iex_Get:
6310 case Iex_GetI:
6311 case Iex_Load:
6312 /* be conservative - these may not give the same value each
6313 time */
6314 return False;
6315 case Iex_Binder:
6316 /* should never see this */
6317 /* fallthrough */
6318 default:
6319 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
6320 ppIRExpr(e1);
6321 VG_(tool_panic)("memcheck:sameIRValue");
6322 return False;
6323 }
6324}
6325
6326/* See if 'pairs' already has an entry for (entry, guard). Return
6327 True if so. If not, add an entry. */
6328
6329static
6330Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
6331{
6332 Pair p;
6333 Pair* pp;
6334 Int i, n = VG_(sizeXA)( pairs );
6335 for (i = 0; i < n; i++) {
6336 pp = VG_(indexXA)( pairs, i );
6337 if (pp->entry == entry && sameIRValue(pp->guard, guard))
6338 return True;
6339 }
6340 p.guard = guard;
6341 p.entry = entry;
6342 VG_(addToXA)( pairs, &p );
6343 return False;
6344}
6345
florian11f3cc82012-10-21 02:19:35 +00006346static Bool is_helperc_value_checkN_fail ( const HChar* name )
sewardj81651dc2007-08-28 06:05:20 +00006347{
6348 return
sewardj7cf4e6b2008-05-01 20:24:26 +00006349 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
6350 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
6351 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
6352 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
6353 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
6354 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
6355 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
6356 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00006357}
6358
6359IRSB* MC_(final_tidy) ( IRSB* sb_in )
6360{
6361 Int i;
6362 IRStmt* st;
6363 IRDirty* di;
6364 IRExpr* guard;
6365 IRCallee* cee;
6366 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00006367 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
6368 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00006369 /* Scan forwards through the statements. Each time a call to one
6370 of the relevant helpers is seen, check if we have made a
6371 previous call to the same helper using the same guard
6372 expression, and if so, delete the call. */
6373 for (i = 0; i < sb_in->stmts_used; i++) {
6374 st = sb_in->stmts[i];
6375 tl_assert(st);
6376 if (st->tag != Ist_Dirty)
6377 continue;
6378 di = st->Ist.Dirty.details;
6379 guard = di->guard;
florian6c0aa2c2013-01-21 01:27:22 +00006380 tl_assert(guard);
sewardj81651dc2007-08-28 06:05:20 +00006381 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
6382 cee = di->cee;
6383 if (!is_helperc_value_checkN_fail( cee->name ))
6384 continue;
6385 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
6386 guard 'guard'. Check if we have already seen a call to this
6387 function with the same guard. If so, delete it. If not,
6388 add it to the set of calls we do know about. */
6389 alreadyPresent = check_or_add( pairs, guard, cee->addr );
6390 if (alreadyPresent) {
6391 sb_in->stmts[i] = IRStmt_NoOp();
6392 if (0) VG_(printf)("XX\n");
6393 }
6394 }
6395 VG_(deleteXA)( pairs );
6396 return sb_in;
6397}
6398
6399
sewardj7cf4e6b2008-05-01 20:24:26 +00006400/*------------------------------------------------------------*/
6401/*--- Origin tracking stuff ---*/
6402/*------------------------------------------------------------*/
6403
sewardj1c0ce7a2009-07-01 08:10:49 +00006404/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00006405static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
6406{
sewardj1c0ce7a2009-07-01 08:10:49 +00006407 TempMapEnt* ent;
6408 /* VG_(indexXA) range-checks 'orig', hence no need to check
6409 here. */
6410 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6411 tl_assert(ent->kind == Orig);
6412 if (ent->shadowB == IRTemp_INVALID) {
6413 IRTemp tmpB
6414 = newTemp( mce, Ity_I32, BSh );
6415 /* newTemp may cause mce->tmpMap to resize, hence previous results
6416 from VG_(indexXA) are invalid. */
6417 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6418 tl_assert(ent->kind == Orig);
6419 tl_assert(ent->shadowB == IRTemp_INVALID);
6420 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00006421 }
sewardj1c0ce7a2009-07-01 08:10:49 +00006422 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00006423}
6424
6425static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
6426{
6427 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
6428}
6429
sewardjcafe5052013-01-17 14:24:35 +00006430
6431/* Make a guarded origin load, with no special handling in the
6432 didn't-happen case. A GUARD of NULL is assumed to mean "always
6433 True".
6434
6435 Generate IR to do a shadow origins load from BASEADDR+OFFSET and
6436 return the otag. The loaded size is SZB. If GUARD evaluates to
6437 False at run time then the returned otag is zero.
6438*/
6439static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB,
6440 IRAtom* baseaddr,
6441 Int offset, IRExpr* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00006442{
6443 void* hFun;
florian6bd9dc12012-11-23 16:17:43 +00006444 const HChar* hName;
sewardj7cf4e6b2008-05-01 20:24:26 +00006445 IRTemp bTmp;
6446 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00006447 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00006448 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6449 IRAtom* ea = baseaddr;
6450 if (offset != 0) {
6451 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6452 : mkU64( (Long)(Int)offset );
6453 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6454 }
sewardj1c0ce7a2009-07-01 08:10:49 +00006455 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00006456
6457 switch (szB) {
6458 case 1: hFun = (void*)&MC_(helperc_b_load1);
6459 hName = "MC_(helperc_b_load1)";
6460 break;
6461 case 2: hFun = (void*)&MC_(helperc_b_load2);
6462 hName = "MC_(helperc_b_load2)";
6463 break;
6464 case 4: hFun = (void*)&MC_(helperc_b_load4);
6465 hName = "MC_(helperc_b_load4)";
6466 break;
6467 case 8: hFun = (void*)&MC_(helperc_b_load8);
6468 hName = "MC_(helperc_b_load8)";
6469 break;
6470 case 16: hFun = (void*)&MC_(helperc_b_load16);
6471 hName = "MC_(helperc_b_load16)";
6472 break;
sewardj45fa9f42012-05-21 10:18:10 +00006473 case 32: hFun = (void*)&MC_(helperc_b_load32);
6474 hName = "MC_(helperc_b_load32)";
6475 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00006476 default:
6477 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
6478 tl_assert(0);
6479 }
6480 di = unsafeIRDirty_1_N(
6481 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
6482 mkIRExprVec_1( ea )
6483 );
sewardjcafe5052013-01-17 14:24:35 +00006484 if (guard) {
6485 di->guard = guard;
6486 /* Ideally the didn't-happen return value here would be
6487 all-zeroes (unknown-origin), so it'd be harmless if it got
6488 used inadvertantly. We slum it out with the IR-mandated
6489 default value (0b01 repeating, 0x55 etc) as that'll probably
6490 trump all legitimate otags via Max32, and it's pretty
6491 obviously bogus. */
6492 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006493 /* no need to mess with any annotations. This call accesses
6494 neither guest state nor guest memory. */
6495 stmt( 'B', mce, IRStmt_Dirty(di) );
6496 if (mce->hWordTy == Ity_I64) {
6497 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00006498 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00006499 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
6500 return mkexpr(bTmp32);
6501 } else {
6502 /* 32-bit host */
6503 return mkexpr(bTmp);
6504 }
6505}
sewardj1c0ce7a2009-07-01 08:10:49 +00006506
sewardjcafe5052013-01-17 14:24:35 +00006507
6508/* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The
6509 loaded size is SZB. The load is regarded as unconditional (always
6510 happens).
6511*/
6512static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
6513 Int offset )
florian434ffae2012-07-19 17:23:42 +00006514{
sewardjcafe5052013-01-17 14:24:35 +00006515 return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/);
florian434ffae2012-07-19 17:23:42 +00006516}
6517
sewardjcafe5052013-01-17 14:24:35 +00006518
6519/* The most general handler for guarded origin loads. A GUARD of NULL
6520 is assumed to mean "always True".
6521
6522 Generate IR to do a shadow origin load from ADDR+BIAS and return
6523 the B bits. The loaded type is TY. If GUARD evaluates to False at
6524 run time then the returned B bits are simply BALT instead.
6525*/
6526static
6527IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce,
6528 IRType ty,
6529 IRAtom* addr, UInt bias,
6530 IRAtom* guard, IRAtom* balt )
6531{
6532 /* If the guard evaluates to True, this will hold the loaded
6533 origin. If the guard evaluates to False, this will be zero,
6534 meaning "unknown origin", in which case we will have to replace
florian5686b2d2013-01-29 03:57:40 +00006535 it using an ITE below. */
sewardjcafe5052013-01-17 14:24:35 +00006536 IRAtom* iftrue
6537 = assignNew('B', mce, Ity_I32,
6538 gen_guarded_load_b(mce, sizeofIRType(ty),
6539 addr, bias, guard));
6540 /* These are the bits we will return if the load doesn't take
6541 place. */
6542 IRAtom* iffalse
6543 = balt;
florian5686b2d2013-01-29 03:57:40 +00006544 /* Prepare the cond for the ITE. Convert a NULL cond into
sewardjcafe5052013-01-17 14:24:35 +00006545 something that iropt knows how to fold out later. */
6546 IRAtom* cond
sewardjcc961652013-01-26 11:49:15 +00006547 = guard == NULL ? mkU1(1) : guard;
sewardjcafe5052013-01-17 14:24:35 +00006548 /* And assemble the final result. */
florian5686b2d2013-01-29 03:57:40 +00006549 return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse));
sewardjcafe5052013-01-17 14:24:35 +00006550}
6551
6552
6553/* Generate a shadow origins store. guard :: Ity_I1 controls whether
6554 the store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00006555static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00006556 IRAtom* baseaddr, Int offset, IRAtom* dataB,
6557 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00006558{
6559 void* hFun;
florian6bd9dc12012-11-23 16:17:43 +00006560 const HChar* hName;
sewardj7cf4e6b2008-05-01 20:24:26 +00006561 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00006562 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00006563 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6564 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00006565 if (guard) {
6566 tl_assert(isOriginalAtom(mce, guard));
6567 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
6568 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006569 if (offset != 0) {
6570 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6571 : mkU64( (Long)(Int)offset );
6572 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6573 }
6574 if (mce->hWordTy == Ity_I64)
6575 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
6576
6577 switch (szB) {
6578 case 1: hFun = (void*)&MC_(helperc_b_store1);
6579 hName = "MC_(helperc_b_store1)";
6580 break;
6581 case 2: hFun = (void*)&MC_(helperc_b_store2);
6582 hName = "MC_(helperc_b_store2)";
6583 break;
6584 case 4: hFun = (void*)&MC_(helperc_b_store4);
6585 hName = "MC_(helperc_b_store4)";
6586 break;
6587 case 8: hFun = (void*)&MC_(helperc_b_store8);
6588 hName = "MC_(helperc_b_store8)";
6589 break;
6590 case 16: hFun = (void*)&MC_(helperc_b_store16);
6591 hName = "MC_(helperc_b_store16)";
6592 break;
sewardj45fa9f42012-05-21 10:18:10 +00006593 case 32: hFun = (void*)&MC_(helperc_b_store32);
6594 hName = "MC_(helperc_b_store32)";
6595 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00006596 default:
6597 tl_assert(0);
6598 }
6599 di = unsafeIRDirty_0_N( 2/*regparms*/,
6600 hName, VG_(fnptr_to_fnentry)( hFun ),
6601 mkIRExprVec_2( ea, dataB )
6602 );
6603 /* no need to mess with any annotations. This call accesses
6604 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00006605 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00006606 stmt( 'B', mce, IRStmt_Dirty(di) );
6607}
6608
6609static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006610 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00006611 if (eTy == Ity_I64)
6612 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
6613 if (eTy == Ity_I32)
6614 return e;
6615 tl_assert(0);
6616}
6617
6618static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006619 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00006620 tl_assert(eTy == Ity_I32);
6621 if (dstTy == Ity_I64)
6622 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
6623 tl_assert(0);
6624}
6625
sewardjdb5907d2009-11-26 17:20:21 +00006626
sewardj7cf4e6b2008-05-01 20:24:26 +00006627static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
6628{
6629 tl_assert(MC_(clo_mc_level) == 3);
6630
6631 switch (e->tag) {
6632
6633 case Iex_GetI: {
6634 IRRegArray* descr_b;
6635 IRAtom *t1, *t2, *t3, *t4;
6636 IRRegArray* descr = e->Iex.GetI.descr;
6637 IRType equivIntTy
6638 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6639 /* If this array is unshadowable for whatever reason, use the
6640 usual approximation. */
6641 if (equivIntTy == Ity_INVALID)
6642 return mkU32(0);
6643 tl_assert(sizeofIRType(equivIntTy) >= 4);
6644 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6645 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6646 equivIntTy, descr->nElems );
6647 /* Do a shadow indexed get of the same size, giving t1. Take
6648 the bottom 32 bits of it, giving t2. Compute into t3 the
6649 origin for the index (almost certainly zero, but there's
6650 no harm in being completely general here, since iropt will
6651 remove any useless code), and fold it in, giving a final
6652 value t4. */
6653 t1 = assignNew( 'B', mce, equivIntTy,
6654 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
6655 e->Iex.GetI.bias ));
6656 t2 = narrowTo32( mce, t1 );
6657 t3 = schemeE( mce, e->Iex.GetI.ix );
6658 t4 = gen_maxU32( mce, t2, t3 );
6659 return t4;
6660 }
6661 case Iex_CCall: {
6662 Int i;
6663 IRAtom* here;
6664 IRExpr** args = e->Iex.CCall.args;
6665 IRAtom* curr = mkU32(0);
6666 for (i = 0; args[i]; i++) {
6667 tl_assert(i < 32);
6668 tl_assert(isOriginalAtom(mce, args[i]));
6669 /* Only take notice of this arg if the callee's
6670 mc-exclusion mask does not say it is to be excluded. */
6671 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
6672 /* the arg is to be excluded from definedness checking.
6673 Do nothing. */
6674 if (0) VG_(printf)("excluding %s(%d)\n",
6675 e->Iex.CCall.cee->name, i);
6676 } else {
6677 /* calculate the arg's definedness, and pessimistically
6678 merge it in. */
6679 here = schemeE( mce, args[i] );
6680 curr = gen_maxU32( mce, curr, here );
6681 }
6682 }
6683 return curr;
6684 }
6685 case Iex_Load: {
6686 Int dszB;
6687 dszB = sizeofIRType(e->Iex.Load.ty);
6688 /* assert that the B value for the address is already
6689 available (somewhere) */
6690 tl_assert(isIRAtom(e->Iex.Load.addr));
6691 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
6692 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
6693 }
florian5686b2d2013-01-29 03:57:40 +00006694 case Iex_ITE: {
6695 IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond );
florian5686b2d2013-01-29 03:57:40 +00006696 IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue );
sewardj07bfda22013-01-29 21:11:55 +00006697 IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse );
sewardj7cf4e6b2008-05-01 20:24:26 +00006698 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
6699 }
6700 case Iex_Qop: {
floriane2ab2972012-06-01 20:43:03 +00006701 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
6702 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
6703 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
6704 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006705 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
6706 gen_maxU32( mce, b3, b4 ) );
6707 }
6708 case Iex_Triop: {
florian26441742012-06-02 20:30:41 +00006709 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
6710 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
6711 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006712 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
6713 }
6714 case Iex_Binop: {
sewardjafed4c52009-07-12 13:00:17 +00006715 switch (e->Iex.Binop.op) {
6716 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
6717 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6718 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6719 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6720 /* Just say these all produce a defined result,
6721 regardless of their arguments. See
6722 COMMENT_ON_CasCmpEQ in this file. */
6723 return mkU32(0);
6724 default: {
6725 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6726 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6727 return gen_maxU32( mce, b1, b2 );
6728 }
6729 }
6730 tl_assert(0);
6731 /*NOTREACHED*/
sewardj7cf4e6b2008-05-01 20:24:26 +00006732 }
6733 case Iex_Unop: {
6734 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6735 return b1;
6736 }
6737 case Iex_Const:
6738 return mkU32(0);
6739 case Iex_RdTmp:
6740 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
6741 case Iex_Get: {
6742 Int b_offset = MC_(get_otrack_shadow_offset)(
6743 e->Iex.Get.offset,
6744 sizeofIRType(e->Iex.Get.ty)
6745 );
6746 tl_assert(b_offset >= -1
6747 && b_offset <= mce->layout->total_sizeB -4);
6748 if (b_offset >= 0) {
6749 /* FIXME: this isn't an atom! */
6750 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
6751 Ity_I32 );
6752 }
6753 return mkU32(0);
6754 }
6755 default:
6756 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
6757 ppIRExpr(e);
6758 VG_(tool_panic)("memcheck:schemeE");
6759 }
6760}
6761
sewardjdb5907d2009-11-26 17:20:21 +00006762
sewardj7cf4e6b2008-05-01 20:24:26 +00006763static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
6764{
6765 // This is a hacked version of do_shadow_Dirty
sewardj2eecb742012-06-01 16:11:41 +00006766 Int i, k, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00006767 IRAtom *here, *curr;
6768 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00006769
6770 /* First check the guard. */
6771 curr = schemeE( mce, d->guard );
6772
6773 /* Now round up all inputs and maxU32 over them. */
6774
florian434ffae2012-07-19 17:23:42 +00006775 /* Inputs: unmasked args
6776 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj7cf4e6b2008-05-01 20:24:26 +00006777 for (i = 0; d->args[i]; i++) {
sewardj21a5f8c2013-08-08 10:41:46 +00006778 IRAtom* arg = d->args[i];
6779 if ( (d->cee->mcx_mask & (1<<i))
floriana5c3ecb2013-08-15 20:55:42 +00006780 || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006781 /* ignore this arg */
6782 } else {
sewardj21a5f8c2013-08-08 10:41:46 +00006783 here = schemeE( mce, arg );
sewardj7cf4e6b2008-05-01 20:24:26 +00006784 curr = gen_maxU32( mce, curr, here );
6785 }
6786 }
6787
6788 /* Inputs: guest state that we read. */
6789 for (i = 0; i < d->nFxState; i++) {
6790 tl_assert(d->fxState[i].fx != Ifx_None);
6791 if (d->fxState[i].fx == Ifx_Write)
6792 continue;
6793
sewardj2eecb742012-06-01 16:11:41 +00006794 /* Enumerate the described state segments */
6795 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6796 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6797 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006798
sewardj2eecb742012-06-01 16:11:41 +00006799 /* Ignore any sections marked as 'always defined'. */
6800 if (isAlwaysDefd(mce, gOff, gSz)) {
6801 if (0)
6802 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6803 gOff, gSz);
6804 continue;
sewardj7cf4e6b2008-05-01 20:24:26 +00006805 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006806
sewardj2eecb742012-06-01 16:11:41 +00006807 /* This state element is read or modified. So we need to
6808 consider it. If larger than 4 bytes, deal with it in
6809 4-byte chunks. */
6810 while (True) {
6811 Int b_offset;
6812 tl_assert(gSz >= 0);
6813 if (gSz == 0) break;
6814 n = gSz <= 4 ? gSz : 4;
6815 /* update 'curr' with maxU32 of the state slice
6816 gOff .. gOff+n-1 */
6817 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6818 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006819 /* Observe the guard expression. If it is false use 0, i.e.
6820 nothing is known about the origin */
6821 IRAtom *cond, *iffalse, *iftrue;
6822
sewardjcc961652013-01-26 11:49:15 +00006823 cond = assignNew( 'B', mce, Ity_I1, d->guard);
florian434ffae2012-07-19 17:23:42 +00006824 iffalse = mkU32(0);
6825 iftrue = assignNew( 'B', mce, Ity_I32,
6826 IRExpr_Get(b_offset
6827 + 2*mce->layout->total_sizeB,
6828 Ity_I32));
6829 here = assignNew( 'B', mce, Ity_I32,
florian5686b2d2013-01-29 03:57:40 +00006830 IRExpr_ITE(cond, iftrue, iffalse));
sewardj2eecb742012-06-01 16:11:41 +00006831 curr = gen_maxU32( mce, curr, here );
6832 }
6833 gSz -= n;
6834 gOff += n;
6835 }
6836 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006837 }
6838
6839 /* Inputs: memory */
6840
6841 if (d->mFx != Ifx_None) {
6842 /* Because we may do multiple shadow loads/stores from the same
6843 base address, it's best to do a single test of its
6844 definedness right now. Post-instrumentation optimisation
6845 should remove all but this test. */
6846 tl_assert(d->mAddr);
6847 here = schemeE( mce, d->mAddr );
6848 curr = gen_maxU32( mce, curr, here );
6849 }
6850
6851 /* Deal with memory inputs (reads or modifies) */
6852 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006853 toDo = d->mSize;
6854 /* chew off 32-bit chunks. We don't care about the endianness
6855 since it's all going to be condensed down to a single bit,
6856 but nevertheless choose an endianness which is hopefully
6857 native to the platform. */
6858 while (toDo >= 4) {
florian434ffae2012-07-19 17:23:42 +00006859 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
6860 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006861 curr = gen_maxU32( mce, curr, here );
6862 toDo -= 4;
6863 }
sewardj8c93fcc2008-10-30 13:08:31 +00006864 /* handle possible 16-bit excess */
6865 while (toDo >= 2) {
florian434ffae2012-07-19 17:23:42 +00006866 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
6867 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006868 curr = gen_maxU32( mce, curr, here );
6869 toDo -= 2;
6870 }
floriancda994b2012-06-08 16:01:19 +00006871 /* chew off the remaining 8-bit chunk, if any */
6872 if (toDo == 1) {
florian434ffae2012-07-19 17:23:42 +00006873 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
6874 d->guard );
floriancda994b2012-06-08 16:01:19 +00006875 curr = gen_maxU32( mce, curr, here );
6876 toDo -= 1;
6877 }
6878 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006879 }
6880
6881 /* Whew! So curr is a 32-bit B-value which should give an origin
6882 of some use if any of the inputs to the helper are undefined.
6883 Now we need to re-distribute the results to all destinations. */
6884
6885 /* Outputs: the destination temporary, if there is one. */
6886 if (d->tmp != IRTemp_INVALID) {
6887 dst = findShadowTmpB(mce, d->tmp);
6888 assign( 'V', mce, dst, curr );
6889 }
6890
6891 /* Outputs: guest state that we write or modify. */
6892 for (i = 0; i < d->nFxState; i++) {
6893 tl_assert(d->fxState[i].fx != Ifx_None);
6894 if (d->fxState[i].fx == Ifx_Read)
6895 continue;
6896
sewardj2eecb742012-06-01 16:11:41 +00006897 /* Enumerate the described state segments */
6898 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6899 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6900 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006901
sewardj2eecb742012-06-01 16:11:41 +00006902 /* Ignore any sections marked as 'always defined'. */
6903 if (isAlwaysDefd(mce, gOff, gSz))
6904 continue;
6905
6906 /* This state element is written or modified. So we need to
6907 consider it. If larger than 4 bytes, deal with it in
6908 4-byte chunks. */
6909 while (True) {
6910 Int b_offset;
6911 tl_assert(gSz >= 0);
6912 if (gSz == 0) break;
6913 n = gSz <= 4 ? gSz : 4;
6914 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
6915 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6916 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006917
florian6c0aa2c2013-01-21 01:27:22 +00006918 /* If the guard expression evaluates to false we simply Put
6919 the value that is already stored in the guest state slot */
6920 IRAtom *cond, *iffalse;
6921
sewardjcc961652013-01-26 11:49:15 +00006922 cond = assignNew('B', mce, Ity_I1,
6923 d->guard);
florian6c0aa2c2013-01-21 01:27:22 +00006924 iffalse = assignNew('B', mce, Ity_I32,
6925 IRExpr_Get(b_offset +
6926 2*mce->layout->total_sizeB,
6927 Ity_I32));
6928 curr = assignNew('V', mce, Ity_I32,
florian5686b2d2013-01-29 03:57:40 +00006929 IRExpr_ITE(cond, curr, iffalse));
florian6c0aa2c2013-01-21 01:27:22 +00006930
sewardj2eecb742012-06-01 16:11:41 +00006931 stmt( 'B', mce, IRStmt_Put(b_offset
florian6c0aa2c2013-01-21 01:27:22 +00006932 + 2*mce->layout->total_sizeB,
sewardj2eecb742012-06-01 16:11:41 +00006933 curr ));
6934 }
6935 gSz -= n;
6936 gOff += n;
sewardj7cf4e6b2008-05-01 20:24:26 +00006937 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006938 }
6939 }
6940
6941 /* Outputs: memory that we write or modify. Same comments about
6942 endianness as above apply. */
6943 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006944 toDo = d->mSize;
6945 /* chew off 32-bit chunks */
6946 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006947 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006948 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006949 toDo -= 4;
6950 }
sewardj8c93fcc2008-10-30 13:08:31 +00006951 /* handle possible 16-bit excess */
6952 while (toDo >= 2) {
sewardjcafe5052013-01-17 14:24:35 +00006953 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
6954 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006955 toDo -= 2;
6956 }
floriancda994b2012-06-08 16:01:19 +00006957 /* chew off the remaining 8-bit chunk, if any */
6958 if (toDo == 1) {
6959 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006960 d->guard );
floriancda994b2012-06-08 16:01:19 +00006961 toDo -= 1;
6962 }
6963 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006964 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006965}
6966
sewardjdb5907d2009-11-26 17:20:21 +00006967
sewardjcafe5052013-01-17 14:24:35 +00006968/* Generate IR for origin shadowing for a general guarded store. */
6969static void do_origins_Store_guarded ( MCEnv* mce,
6970 IREndness stEnd,
6971 IRExpr* stAddr,
6972 IRExpr* stData,
6973 IRExpr* guard )
sewardjdb5907d2009-11-26 17:20:21 +00006974{
6975 Int dszB;
6976 IRAtom* dataB;
6977 /* assert that the B value for the address is already available
6978 (somewhere), since the call to schemeE will want to see it.
6979 XXXX how does this actually ensure that?? */
6980 tl_assert(isIRAtom(stAddr));
6981 tl_assert(isIRAtom(stData));
6982 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
6983 dataB = schemeE( mce, stData );
sewardjcafe5052013-01-17 14:24:35 +00006984 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard );
6985}
6986
6987
6988/* Generate IR for origin shadowing for a plain store. */
6989static void do_origins_Store_plain ( MCEnv* mce,
6990 IREndness stEnd,
6991 IRExpr* stAddr,
6992 IRExpr* stData )
6993{
6994 do_origins_Store_guarded ( mce, stEnd, stAddr, stData,
6995 NULL/*guard*/ );
6996}
6997
6998
6999/* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
7000
7001static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg )
7002{
7003 do_origins_Store_guarded( mce, sg->end, sg->addr,
7004 sg->data, sg->guard );
7005}
7006
7007static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg )
7008{
7009 IRType loadedTy = Ity_INVALID;
7010 switch (lg->cvt) {
7011 case ILGop_Ident32: loadedTy = Ity_I32; break;
7012 case ILGop_16Uto32: loadedTy = Ity_I16; break;
7013 case ILGop_16Sto32: loadedTy = Ity_I16; break;
7014 case ILGop_8Uto32: loadedTy = Ity_I8; break;
7015 case ILGop_8Sto32: loadedTy = Ity_I8; break;
7016 default: VG_(tool_panic)("schemeS.IRLoadG");
7017 }
7018 IRAtom* ori_alt
7019 = schemeE( mce,lg->alt );
7020 IRAtom* ori_final
7021 = expr2ori_Load_guarded_General(mce, loadedTy,
7022 lg->addr, 0/*addr bias*/,
7023 lg->guard, ori_alt );
7024 /* And finally, bind the origin to the destination temporary. */
7025 assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final );
sewardjdb5907d2009-11-26 17:20:21 +00007026}
7027
7028
sewardj7cf4e6b2008-05-01 20:24:26 +00007029static void schemeS ( MCEnv* mce, IRStmt* st )
7030{
7031 tl_assert(MC_(clo_mc_level) == 3);
7032
7033 switch (st->tag) {
7034
7035 case Ist_AbiHint:
7036 /* The value-check instrumenter handles this - by arranging
7037 to pass the address of the next instruction to
7038 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
7039 happen for origin tracking w.r.t. AbiHints. So there is
7040 nothing to do here. */
7041 break;
7042
7043 case Ist_PutI: {
floriand39b0222012-05-31 15:48:13 +00007044 IRPutI *puti = st->Ist.PutI.details;
sewardj7cf4e6b2008-05-01 20:24:26 +00007045 IRRegArray* descr_b;
7046 IRAtom *t1, *t2, *t3, *t4;
floriand39b0222012-05-31 15:48:13 +00007047 IRRegArray* descr = puti->descr;
sewardj7cf4e6b2008-05-01 20:24:26 +00007048 IRType equivIntTy
7049 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
7050 /* If this array is unshadowable for whatever reason,
7051 generate no code. */
7052 if (equivIntTy == Ity_INVALID)
7053 break;
7054 tl_assert(sizeofIRType(equivIntTy) >= 4);
7055 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
7056 descr_b
7057 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
7058 equivIntTy, descr->nElems );
7059 /* Compute a value to Put - the conjoinment of the origin for
7060 the data to be Put-ted (obviously) and of the index value
7061 (not so obviously). */
floriand39b0222012-05-31 15:48:13 +00007062 t1 = schemeE( mce, puti->data );
7063 t2 = schemeE( mce, puti->ix );
sewardj7cf4e6b2008-05-01 20:24:26 +00007064 t3 = gen_maxU32( mce, t1, t2 );
7065 t4 = zWidenFrom32( mce, equivIntTy, t3 );
floriand39b0222012-05-31 15:48:13 +00007066 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
7067 puti->bias, t4) ));
sewardj7cf4e6b2008-05-01 20:24:26 +00007068 break;
7069 }
sewardjdb5907d2009-11-26 17:20:21 +00007070
sewardj7cf4e6b2008-05-01 20:24:26 +00007071 case Ist_Dirty:
7072 do_origins_Dirty( mce, st->Ist.Dirty.details );
7073 break;
sewardjdb5907d2009-11-26 17:20:21 +00007074
7075 case Ist_Store:
sewardjcafe5052013-01-17 14:24:35 +00007076 do_origins_Store_plain( mce, st->Ist.Store.end,
7077 st->Ist.Store.addr,
7078 st->Ist.Store.data );
7079 break;
7080
7081 case Ist_StoreG:
7082 do_origins_StoreG( mce, st->Ist.StoreG.details );
7083 break;
7084
7085 case Ist_LoadG:
7086 do_origins_LoadG( mce, st->Ist.LoadG.details );
sewardjdb5907d2009-11-26 17:20:21 +00007087 break;
7088
7089 case Ist_LLSC: {
7090 /* In short: treat a load-linked like a normal load followed
7091 by an assignment of the loaded (shadow) data the result
7092 temporary. Treat a store-conditional like a normal store,
7093 and mark the result temporary as defined. */
7094 if (st->Ist.LLSC.storedata == NULL) {
7095 /* Load Linked */
7096 IRType resTy
7097 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
7098 IRExpr* vanillaLoad
7099 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
7100 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
7101 || resTy == Ity_I16 || resTy == Ity_I8);
7102 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7103 schemeE(mce, vanillaLoad));
7104 } else {
7105 /* Store conditional */
sewardjcafe5052013-01-17 14:24:35 +00007106 do_origins_Store_plain( mce, st->Ist.LLSC.end,
7107 st->Ist.LLSC.addr,
7108 st->Ist.LLSC.storedata );
sewardjdb5907d2009-11-26 17:20:21 +00007109 /* For the rationale behind this, see comments at the
7110 place where the V-shadow for .result is constructed, in
7111 do_shadow_LLSC. In short, we regard .result as
7112 always-defined. */
7113 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7114 mkU32(0) );
sewardj1c0ce7a2009-07-01 08:10:49 +00007115 }
sewardj7cf4e6b2008-05-01 20:24:26 +00007116 break;
7117 }
sewardjdb5907d2009-11-26 17:20:21 +00007118
sewardj7cf4e6b2008-05-01 20:24:26 +00007119 case Ist_Put: {
7120 Int b_offset
7121 = MC_(get_otrack_shadow_offset)(
7122 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00007123 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00007124 );
7125 if (b_offset >= 0) {
7126 /* FIXME: this isn't an atom! */
7127 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
7128 schemeE( mce, st->Ist.Put.data )) );
7129 }
7130 break;
7131 }
sewardjdb5907d2009-11-26 17:20:21 +00007132
sewardj7cf4e6b2008-05-01 20:24:26 +00007133 case Ist_WrTmp:
7134 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
7135 schemeE(mce, st->Ist.WrTmp.data) );
7136 break;
sewardjdb5907d2009-11-26 17:20:21 +00007137
sewardj7cf4e6b2008-05-01 20:24:26 +00007138 case Ist_MBE:
7139 case Ist_NoOp:
7140 case Ist_Exit:
7141 case Ist_IMark:
7142 break;
sewardjdb5907d2009-11-26 17:20:21 +00007143
sewardj7cf4e6b2008-05-01 20:24:26 +00007144 default:
7145 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
7146 ppIRStmt(st);
7147 VG_(tool_panic)("memcheck:schemeS");
7148 }
7149}
7150
7151
njn25e49d8e72002-09-23 09:36:25 +00007152/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00007153/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00007154/*--------------------------------------------------------------------*/