blob: c433b43b6af463725f8cc7906560244766c2bc97 [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj0f157dd2013-10-18 14:27:36 +000011 Copyright (C) 2000-2013 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000033#include "pub_tool_poolalloc.h" // For mc_include.h
njn1d0825f2006-03-27 11:37:07 +000034#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000035#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000036#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000037#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000038#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000039#include "pub_tool_xarray.h"
40#include "pub_tool_mallocfree.h"
41#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000042
sewardj7cf4e6b2008-05-01 20:24:26 +000043#include "mc_include.h"
44
45
sewardj7ee7d852011-06-16 11:37:21 +000046/* FIXMEs JRS 2011-June-16.
47
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
51*/
52
sewardj992dff92005-10-07 11:08:55 +000053/* This file implements the Memcheck instrumentation, and in
54 particular contains the core of its undefined value detection
55 machinery. For a comprehensive background of the terminology,
56 algorithms and rationale used herein, read:
57
58 Using Valgrind to detect undefined value errors with
59 bit-precision
60
61 Julian Seward and Nicholas Nethercote
62
63 2005 USENIX Annual Technical Conference (General Track),
64 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000065
66 ----
67
68 Here is as good a place as any to record exactly when V bits are and
69 should be checked, why, and what function is responsible.
70
71
72 Memcheck complains when an undefined value is used:
73
74 1. In the condition of a conditional branch. Because it could cause
75 incorrect control flow, and thus cause incorrect externally-visible
76 behaviour. [mc_translate.c:complainIfUndefined]
77
78 2. As an argument to a system call, or as the value that specifies
79 the system call number. Because it could cause an incorrect
80 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
81
82 3. As the address in a load or store. Because it could cause an
83 incorrect value to be used later, which could cause externally-visible
84 behaviour (eg. via incorrect control flow or an incorrect system call
85 argument) [complainIfUndefined]
86
87 4. As the target address of a branch. Because it could cause incorrect
88 control flow. [complainIfUndefined]
89
90 5. As an argument to setenv, unsetenv, or putenv. Because it could put
91 an incorrect value into the external environment.
92 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
93
94 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
95 [complainIfUndefined]
96
97 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
98 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
99 requested it. [in memcheck.h]
100
101
102 Memcheck also complains, but should not, when an undefined value is used:
103
104 8. As the shift value in certain SIMD shift operations (but not in the
105 standard integer shift operations). This inconsistency is due to
106 historical reasons.) [complainIfUndefined]
107
108
109 Memcheck does not complain, but should, when an undefined value is used:
110
111 9. As an input to a client request. Because the client request may
112 affect the visible behaviour -- see bug #144362 for an example
113 involving the malloc replacements in vg_replace_malloc.c and
114 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
115 isn't identified. That bug report also has some info on how to solve
116 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
117
118
119 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000120*/
121
sewardjb9e6d242013-05-11 13:42:08 +0000122/* Generation of addr-definedness, addr-validity and
123 guard-definedness checks pertaining to loads and stores (Iex_Load,
124 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
125 loads/stores) was re-checked 11 May 2013. */
126
sewardj95448072004-11-22 20:19:51 +0000127/*------------------------------------------------------------*/
128/*--- Forward decls ---*/
129/*------------------------------------------------------------*/
130
131struct _MCEnv;
132
sewardj7cf4e6b2008-05-01 20:24:26 +0000133static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000134static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000135static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000136
sewardjb5b87402011-03-07 16:05:35 +0000137static IRExpr *i128_const_zero(void);
sewardj95448072004-11-22 20:19:51 +0000138
139/*------------------------------------------------------------*/
140/*--- Memcheck running state, and tmp management. ---*/
141/*------------------------------------------------------------*/
142
sewardj1c0ce7a2009-07-01 08:10:49 +0000143/* Carries info about a particular tmp. The tmp's number is not
144 recorded, as this is implied by (equal to) its index in the tmpMap
145 in MCEnv. The tmp's type is also not recorded, as this is present
146 in MCEnv.sb->tyenv.
147
148 When .kind is Orig, .shadowV and .shadowB may give the identities
149 of the temps currently holding the associated definedness (shadowV)
150 and origin (shadowB) values, or these may be IRTemp_INVALID if code
151 to compute such values has not yet been emitted.
152
153 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
154 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
155 illogical for a shadow tmp itself to be shadowed.
156*/
157typedef
158 enum { Orig=1, VSh=2, BSh=3 }
159 TempKind;
160
161typedef
162 struct {
163 TempKind kind;
164 IRTemp shadowV;
165 IRTemp shadowB;
166 }
167 TempMapEnt;
168
169
sewardj95448072004-11-22 20:19:51 +0000170/* Carries around state during memcheck instrumentation. */
171typedef
172 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000173 /* MODIFIED: the superblock being constructed. IRStmts are
174 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000175 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000176 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000177
sewardj1c0ce7a2009-07-01 08:10:49 +0000178 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
179 current kind and possibly shadow temps for each temp in the
180 IRSB being constructed. Note that it does not contain the
181 type of each tmp. If you want to know the type, look at the
182 relevant entry in sb->tyenv. It follows that at all times
183 during the instrumentation process, the valid indices for
184 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
185 total number of Orig, V- and B- temps allocated so far.
186
187 The reason for this strange split (types in one place, all
188 other info in another) is that we need the types to be
189 attached to sb so as to make it possible to do
190 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
191 instrumentation process. */
192 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000193
sewardjd5204dc2004-12-31 01:16:11 +0000194 /* MODIFIED: indicates whether "bogus" literals have so far been
195 found. Starts off False, and may change to True. */
sewardj54eac252012-03-27 10:19:39 +0000196 Bool bogusLiterals;
197
198 /* READONLY: indicates whether we should use expensive
199 interpretations of integer adds, since unfortunately LLVM
200 uses them to do ORs in some circumstances. Defaulted to True
201 on MacOS and False everywhere else. */
202 Bool useLLVMworkarounds;
sewardjd5204dc2004-12-31 01:16:11 +0000203
sewardj95448072004-11-22 20:19:51 +0000204 /* READONLY: the guest layout. This indicates which parts of
205 the guest state should be regarded as 'always defined'. */
206 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000207
sewardj95448072004-11-22 20:19:51 +0000208 /* READONLY: the host word type. Needed for constructing
209 arguments of type 'HWord' to be passed to helper functions.
210 Ity_I32 or Ity_I64 only. */
211 IRType hWordTy;
212 }
213 MCEnv;
214
215/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
216 demand), as they are encountered. This is for two reasons.
217
218 (1) (less important reason): Many original tmps are unused due to
219 initial IR optimisation, and we do not want to spaces in tables
220 tracking them.
221
222 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
223 table indexed [0 .. n_types-1], which gives the current shadow for
224 each original tmp, or INVALID_IRTEMP if none is so far assigned.
225 It is necessary to support making multiple assignments to a shadow
226 -- specifically, after testing a shadow for definedness, it needs
227 to be made defined. But IR's SSA property disallows this.
228
229 (2) (more important reason): Therefore, when a shadow needs to get
230 a new value, a new temporary is created, the value is assigned to
231 that, and the tmpMap is updated to reflect the new binding.
232
233 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000234 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000235 there's a read-before-write error in the original tmps. The IR
236 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000237*/
sewardj95448072004-11-22 20:19:51 +0000238
sewardj1c0ce7a2009-07-01 08:10:49 +0000239/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
240 both the table in mce->sb and to our auxiliary mapping. Note that
241 newTemp may cause mce->tmpMap to resize, hence previous results
242 from VG_(indexXA)(mce->tmpMap) are invalidated. */
243static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
244{
245 Word newIx;
246 TempMapEnt ent;
247 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
248 ent.kind = kind;
249 ent.shadowV = IRTemp_INVALID;
250 ent.shadowB = IRTemp_INVALID;
251 newIx = VG_(addToXA)( mce->tmpMap, &ent );
252 tl_assert(newIx == (Word)tmp);
253 return tmp;
254}
255
256
sewardj95448072004-11-22 20:19:51 +0000257/* Find the tmp currently shadowing the given original tmp. If none
258 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000259static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000260{
sewardj1c0ce7a2009-07-01 08:10:49 +0000261 TempMapEnt* ent;
262 /* VG_(indexXA) range-checks 'orig', hence no need to check
263 here. */
264 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
265 tl_assert(ent->kind == Orig);
266 if (ent->shadowV == IRTemp_INVALID) {
267 IRTemp tmpV
268 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
269 /* newTemp may cause mce->tmpMap to resize, hence previous results
270 from VG_(indexXA) are invalid. */
271 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
272 tl_assert(ent->kind == Orig);
273 tl_assert(ent->shadowV == IRTemp_INVALID);
274 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000275 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000276 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000277}
278
sewardj95448072004-11-22 20:19:51 +0000279/* Allocate a new shadow for the given original tmp. This means any
280 previous shadow is abandoned. This is needed because it is
281 necessary to give a new value to a shadow once it has been tested
282 for undefinedness, but unfortunately IR's SSA property disallows
283 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000284 and use that instead.
285
286 This is the same as findShadowTmpV, except we don't bother to see
287 if a shadow temp already existed -- we simply allocate a new one
288 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000289static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000290{
sewardj1c0ce7a2009-07-01 08:10:49 +0000291 TempMapEnt* ent;
292 /* VG_(indexXA) range-checks 'orig', hence no need to check
293 here. */
294 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
295 tl_assert(ent->kind == Orig);
296 if (1) {
297 IRTemp tmpV
298 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
299 /* newTemp may cause mce->tmpMap to resize, hence previous results
300 from VG_(indexXA) are invalid. */
301 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
302 tl_assert(ent->kind == Orig);
303 ent->shadowV = tmpV;
304 }
sewardj95448072004-11-22 20:19:51 +0000305}
306
307
308/*------------------------------------------------------------*/
309/*--- IRAtoms -- a subset of IRExprs ---*/
310/*------------------------------------------------------------*/
311
312/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000313 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000314 input, most of this code deals in atoms. Usefully, a value atom
315 always has a V-value which is also an atom: constants are shadowed
316 by constants, and temps are shadowed by the corresponding shadow
317 temporary. */
318
319typedef IRExpr IRAtom;
320
321/* (used for sanity checks only): is this an atom which looks
322 like it's from original code? */
323static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
324{
325 if (a1->tag == Iex_Const)
326 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000327 if (a1->tag == Iex_RdTmp) {
328 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
329 return ent->kind == Orig;
330 }
sewardj95448072004-11-22 20:19:51 +0000331 return False;
332}
333
334/* (used for sanity checks only): is this an atom which looks
335 like it's from shadow code? */
336static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
337{
338 if (a1->tag == Iex_Const)
339 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000340 if (a1->tag == Iex_RdTmp) {
341 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
342 return ent->kind == VSh || ent->kind == BSh;
343 }
sewardj95448072004-11-22 20:19:51 +0000344 return False;
345}
346
347/* (used for sanity checks only): check that both args are atoms and
348 are identically-kinded. */
349static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
350{
sewardj0b9d74a2006-12-24 02:24:11 +0000351 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000352 return True;
sewardjbef552a2005-08-30 12:54:36 +0000353 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000354 return True;
355 return False;
356}
357
358
359/*------------------------------------------------------------*/
360/*--- Type management ---*/
361/*------------------------------------------------------------*/
362
363/* Shadow state is always accessed using integer types. This returns
364 an integer type with the same size (as per sizeofIRType) as the
365 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj45fa9f42012-05-21 10:18:10 +0000366 I64, I128, V128, V256. */
sewardj95448072004-11-22 20:19:51 +0000367
sewardj7cf4e6b2008-05-01 20:24:26 +0000368static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000369{
370 switch (ty) {
371 case Ity_I1:
372 case Ity_I8:
373 case Ity_I16:
374 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000375 case Ity_I64:
376 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000377 case Ity_F32: return Ity_I32;
sewardjb0ccb4d2012-04-02 10:22:05 +0000378 case Ity_D32: return Ity_I32;
sewardj3245c912004-12-10 14:58:26 +0000379 case Ity_F64: return Ity_I64;
sewardjb0ccb4d2012-04-02 10:22:05 +0000380 case Ity_D64: return Ity_I64;
sewardjb5b87402011-03-07 16:05:35 +0000381 case Ity_F128: return Ity_I128;
sewardjb0ccb4d2012-04-02 10:22:05 +0000382 case Ity_D128: return Ity_I128;
sewardj3245c912004-12-10 14:58:26 +0000383 case Ity_V128: return Ity_V128;
sewardj45fa9f42012-05-21 10:18:10 +0000384 case Ity_V256: return Ity_V256;
sewardj95448072004-11-22 20:19:51 +0000385 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000386 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000387 }
388}
389
390/* Produce a 'defined' value of the given shadow type. Should only be
391 supplied shadow types (Bit/I8/I16/I32/UI64). */
392static IRExpr* definedOfType ( IRType ty ) {
393 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000394 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
395 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
396 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
397 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
398 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
sewardjb5b87402011-03-07 16:05:35 +0000399 case Ity_I128: return i128_const_zero();
sewardj170ee212004-12-10 18:57:51 +0000400 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardj1eb272f2014-01-26 18:36:52 +0000401 case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000));
sewardjf1962d32006-10-19 13:22:16 +0000402 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000403 }
404}
405
406
sewardj95448072004-11-22 20:19:51 +0000407/*------------------------------------------------------------*/
408/*--- Constructing IR fragments ---*/
409/*------------------------------------------------------------*/
410
sewardj95448072004-11-22 20:19:51 +0000411/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000412static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
413 if (mce->trace) {
414 VG_(printf)(" %c: ", cat);
415 ppIRStmt(st);
416 VG_(printf)("\n");
417 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000418 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000419}
420
421/* assign value to tmp */
422static inline
423void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000424 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000425}
sewardj95448072004-11-22 20:19:51 +0000426
427/* build various kinds of expressions */
sewardj57f92b02010-08-22 11:54:14 +0000428#define triop(_op, _arg1, _arg2, _arg3) \
429 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
sewardj95448072004-11-22 20:19:51 +0000430#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
431#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
sewardjcc961652013-01-26 11:49:15 +0000432#define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
sewardj95448072004-11-22 20:19:51 +0000433#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
434#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
435#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
436#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000437#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000438#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000439
sewardj7cf4e6b2008-05-01 20:24:26 +0000440/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000441 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000442 an atom.
443
444 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000445 needs to be. But passing it in is redundant, since we can deduce
446 the type merely by inspecting 'e'. So at least use that fact to
447 assert that the two types agree. */
448static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
449{
450 TempKind k;
451 IRTemp t;
452 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardjb0ccb4d2012-04-02 10:22:05 +0000453
sewardj7cf4e6b2008-05-01 20:24:26 +0000454 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000455 switch (cat) {
456 case 'V': k = VSh; break;
457 case 'B': k = BSh; break;
458 case 'C': k = Orig; break;
459 /* happens when we are making up new "orig"
460 expressions, for IRCAS handling */
461 default: tl_assert(0);
462 }
463 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000464 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000465 return mkexpr(t);
466}
467
468
469/*------------------------------------------------------------*/
sewardjb5b87402011-03-07 16:05:35 +0000470/*--- Helper functions for 128-bit ops ---*/
471/*------------------------------------------------------------*/
sewardj45fa9f42012-05-21 10:18:10 +0000472
sewardjb5b87402011-03-07 16:05:35 +0000473static IRExpr *i128_const_zero(void)
474{
sewardj45fa9f42012-05-21 10:18:10 +0000475 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
476 return binop(Iop_64HLto128, z64, z64);
sewardjb5b87402011-03-07 16:05:35 +0000477}
478
sewardj45fa9f42012-05-21 10:18:10 +0000479/* There are no I128-bit loads and/or stores [as generated by any
480 current front ends]. So we do not need to worry about that in
481 expr2vbits_Load */
482
sewardjb5b87402011-03-07 16:05:35 +0000483
484/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +0000485/*--- Constructing definedness primitive ops ---*/
486/*------------------------------------------------------------*/
487
488/* --------- Defined-if-either-defined --------- */
489
490static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
491 tl_assert(isShadowAtom(mce,a1));
492 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000493 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000494}
495
496static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
497 tl_assert(isShadowAtom(mce,a1));
498 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000499 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000500}
501
502static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
503 tl_assert(isShadowAtom(mce,a1));
504 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000505 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000506}
507
sewardj7010f6e2004-12-10 13:35:22 +0000508static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
509 tl_assert(isShadowAtom(mce,a1));
510 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000511 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000512}
513
sewardj20d38f22005-02-07 23:50:18 +0000514static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000515 tl_assert(isShadowAtom(mce,a1));
516 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000517 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000518}
519
sewardj350e8f72012-06-25 07:52:15 +0000520static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
521 tl_assert(isShadowAtom(mce,a1));
522 tl_assert(isShadowAtom(mce,a2));
523 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
524}
525
sewardj95448072004-11-22 20:19:51 +0000526/* --------- Undefined-if-either-undefined --------- */
527
528static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
529 tl_assert(isShadowAtom(mce,a1));
530 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000531 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000532}
533
534static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
535 tl_assert(isShadowAtom(mce,a1));
536 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000537 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000538}
539
540static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
541 tl_assert(isShadowAtom(mce,a1));
542 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000543 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000544}
545
546static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
547 tl_assert(isShadowAtom(mce,a1));
548 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000549 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000550}
551
sewardjb5b87402011-03-07 16:05:35 +0000552static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
553 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
554 tl_assert(isShadowAtom(mce,a1));
555 tl_assert(isShadowAtom(mce,a2));
556 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
557 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
558 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
559 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
560 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
561 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
562
563 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
564}
565
sewardj20d38f22005-02-07 23:50:18 +0000566static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000567 tl_assert(isShadowAtom(mce,a1));
568 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000569 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000570}
571
sewardj350e8f72012-06-25 07:52:15 +0000572static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
573 tl_assert(isShadowAtom(mce,a1));
574 tl_assert(isShadowAtom(mce,a2));
575 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
576}
577
sewardje50a1b12004-12-17 01:24:54 +0000578static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000579 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000580 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000581 case Ity_I16: return mkUifU16(mce, a1, a2);
582 case Ity_I32: return mkUifU32(mce, a1, a2);
583 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardjb5b87402011-03-07 16:05:35 +0000584 case Ity_I128: return mkUifU128(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000585 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardja2f30952013-03-27 11:40:02 +0000586 case Ity_V256: return mkUifUV256(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000587 default:
588 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
589 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000590 }
591}
592
sewardj95448072004-11-22 20:19:51 +0000593/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000594
sewardj95448072004-11-22 20:19:51 +0000595static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
596 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000597 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000598}
599
600static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
601 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000602 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000603}
604
605static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
606 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000607 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000608}
609
sewardj681be302005-01-15 20:43:58 +0000610static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
611 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000612 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000613}
614
sewardj95448072004-11-22 20:19:51 +0000615/* --------- 'Improvement' functions for AND/OR. --------- */
616
617/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
618 defined (0); all other -> undefined (1).
619*/
620static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000621{
sewardj95448072004-11-22 20:19:51 +0000622 tl_assert(isOriginalAtom(mce, data));
623 tl_assert(isShadowAtom(mce, vbits));
624 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000625 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000626}
njn25e49d8e72002-09-23 09:36:25 +0000627
sewardj95448072004-11-22 20:19:51 +0000628static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629{
630 tl_assert(isOriginalAtom(mce, data));
631 tl_assert(isShadowAtom(mce, vbits));
632 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000633 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000634}
njn25e49d8e72002-09-23 09:36:25 +0000635
sewardj95448072004-11-22 20:19:51 +0000636static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
637{
638 tl_assert(isOriginalAtom(mce, data));
639 tl_assert(isShadowAtom(mce, vbits));
640 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000641 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000642}
njn25e49d8e72002-09-23 09:36:25 +0000643
sewardj7010f6e2004-12-10 13:35:22 +0000644static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
645{
646 tl_assert(isOriginalAtom(mce, data));
647 tl_assert(isShadowAtom(mce, vbits));
648 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000649 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000650}
651
sewardj20d38f22005-02-07 23:50:18 +0000652static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000653{
654 tl_assert(isOriginalAtom(mce, data));
655 tl_assert(isShadowAtom(mce, vbits));
656 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000657 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000658}
659
sewardj350e8f72012-06-25 07:52:15 +0000660static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
661{
662 tl_assert(isOriginalAtom(mce, data));
663 tl_assert(isShadowAtom(mce, vbits));
664 tl_assert(sameKindedAtoms(data, vbits));
665 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
666}
667
sewardj95448072004-11-22 20:19:51 +0000668/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
669 defined (0); all other -> undefined (1).
670*/
671static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
672{
673 tl_assert(isOriginalAtom(mce, data));
674 tl_assert(isShadowAtom(mce, vbits));
675 tl_assert(sameKindedAtoms(data, vbits));
676 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000677 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000678 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000679 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000680 vbits) );
681}
njn25e49d8e72002-09-23 09:36:25 +0000682
sewardj95448072004-11-22 20:19:51 +0000683static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
684{
685 tl_assert(isOriginalAtom(mce, data));
686 tl_assert(isShadowAtom(mce, vbits));
687 tl_assert(sameKindedAtoms(data, vbits));
688 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000689 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000690 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000691 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000692 vbits) );
693}
njn25e49d8e72002-09-23 09:36:25 +0000694
sewardj95448072004-11-22 20:19:51 +0000695static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
696{
697 tl_assert(isOriginalAtom(mce, data));
698 tl_assert(isShadowAtom(mce, vbits));
699 tl_assert(sameKindedAtoms(data, vbits));
700 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000701 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000702 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000703 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000704 vbits) );
705}
706
sewardj7010f6e2004-12-10 13:35:22 +0000707static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
708{
709 tl_assert(isOriginalAtom(mce, data));
710 tl_assert(isShadowAtom(mce, vbits));
711 tl_assert(sameKindedAtoms(data, vbits));
712 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000713 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000714 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000715 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000716 vbits) );
717}
718
sewardj20d38f22005-02-07 23:50:18 +0000719static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000720{
721 tl_assert(isOriginalAtom(mce, data));
722 tl_assert(isShadowAtom(mce, vbits));
723 tl_assert(sameKindedAtoms(data, vbits));
724 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000725 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000726 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000727 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000728 vbits) );
729}
730
sewardj350e8f72012-06-25 07:52:15 +0000731static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
732{
733 tl_assert(isOriginalAtom(mce, data));
734 tl_assert(isShadowAtom(mce, vbits));
735 tl_assert(sameKindedAtoms(data, vbits));
736 return assignNew(
737 'V', mce, Ity_V256,
738 binop(Iop_OrV256,
739 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
740 vbits) );
741}
742
sewardj95448072004-11-22 20:19:51 +0000743/* --------- Pessimising casts. --------- */
744
sewardjb5b87402011-03-07 16:05:35 +0000745/* The function returns an expression of type DST_TY. If any of the VBITS
746 is undefined (value == 1) the resulting expression has all bits set to
747 1. Otherwise, all bits are 0. */
748
sewardj95448072004-11-22 20:19:51 +0000749static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
750{
sewardj4cc684b2007-08-25 23:09:36 +0000751 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000752 IRAtom* tmp1;
sewardj2eecb742012-06-01 16:11:41 +0000753
sewardj95448072004-11-22 20:19:51 +0000754 /* Note, dst_ty is a shadow type, not an original type. */
sewardj95448072004-11-22 20:19:51 +0000755 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000756 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000757
758 /* Fast-track some common cases */
759 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000760 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000761
762 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000763 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000764
765 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj2eecb742012-06-01 16:11:41 +0000766 /* PCast the arg, then clone it. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000767 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
768 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000769 }
770
sewardj1eb272f2014-01-26 18:36:52 +0000771 if (src_ty == Ity_I32 && dst_ty == Ity_V128) {
772 /* PCast the arg, then clone it 4 times. */
773 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
774 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
775 return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
776 }
777
778 if (src_ty == Ity_I32 && dst_ty == Ity_V256) {
779 /* PCast the arg, then clone it 8 times. */
780 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
781 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
782 tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
783 return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp));
784 }
785
sewardj2eecb742012-06-01 16:11:41 +0000786 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
787 /* PCast the arg. This gives all 0s or all 1s. Then throw away
788 the top half. */
789 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
790 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
791 }
792
sewardj4cc684b2007-08-25 23:09:36 +0000793 /* Else do it the slow way .. */
sewardj2eecb742012-06-01 16:11:41 +0000794 /* First of all, collapse vbits down to a single bit. */
sewardj4cc684b2007-08-25 23:09:36 +0000795 tmp1 = NULL;
796 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000797 case Ity_I1:
798 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000799 break;
sewardj95448072004-11-22 20:19:51 +0000800 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000801 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000802 break;
803 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000804 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000805 break;
806 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000807 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000808 break;
809 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000810 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000811 break;
sewardj69a13322005-04-23 01:14:51 +0000812 case Ity_I128: {
813 /* Gah. Chop it in half, OR the halves together, and compare
814 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000815 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
816 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
817 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
818 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000819 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000820 break;
821 }
sewardj95448072004-11-22 20:19:51 +0000822 default:
sewardj4cc684b2007-08-25 23:09:36 +0000823 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000824 VG_(tool_panic)("mkPCastTo(1)");
825 }
826 tl_assert(tmp1);
827 /* Now widen up to the dst type. */
828 switch (dst_ty) {
829 case Ity_I1:
830 return tmp1;
831 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000832 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000833 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000834 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000835 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000836 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000837 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000838 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000839 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000840 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
841 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000842 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000843 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000844 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
845 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000846 return tmp1;
sewardja2f30952013-03-27 11:40:02 +0000847 case Ity_V256:
848 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
849 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
850 tmp1, tmp1));
851 tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
852 tmp1, tmp1));
853 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000854 default:
855 ppIRType(dst_ty);
856 VG_(tool_panic)("mkPCastTo(2)");
857 }
858}
859
sewardjd5204dc2004-12-31 01:16:11 +0000860/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
861/*
862 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
863 PCasting to Ity_U1. However, sometimes it is necessary to be more
864 accurate. The insight is that the result is defined if two
865 corresponding bits can be found, one from each argument, so that
866 both bits are defined but are different -- that makes EQ say "No"
867 and NE say "Yes". Hence, we compute an improvement term and DifD
868 it onto the "normal" (UifU) result.
869
870 The result is:
871
872 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000873 -- naive version
874 PCastTo<sz>( UifU<sz>(vxx, vyy) )
875
sewardjd5204dc2004-12-31 01:16:11 +0000876 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000877
878 -- improvement term
879 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000880 )
sewardje6f8af42005-07-06 18:48:59 +0000881
sewardjd5204dc2004-12-31 01:16:11 +0000882 where
883 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000884 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000885
sewardje6f8af42005-07-06 18:48:59 +0000886 vec = Or<sz>( vxx, // 0 iff bit defined
887 vyy, // 0 iff bit defined
888 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
889 )
890
891 If any bit of vec is 0, the result is defined and so the
892 improvement term should produce 0...0, else it should produce
893 1...1.
894
895 Hence require for the improvement term:
896
897 if vec == 1...1 then 1...1 else 0...0
898 ->
899 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
900
901 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000902*/
903static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
904 IRType ty,
905 IRAtom* vxx, IRAtom* vyy,
906 IRAtom* xx, IRAtom* yy )
907{
sewardje6f8af42005-07-06 18:48:59 +0000908 IRAtom *naive, *vec, *improvement_term;
909 IRAtom *improved, *final_cast, *top;
910 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000911
912 tl_assert(isShadowAtom(mce,vxx));
913 tl_assert(isShadowAtom(mce,vyy));
914 tl_assert(isOriginalAtom(mce,xx));
915 tl_assert(isOriginalAtom(mce,yy));
916 tl_assert(sameKindedAtoms(vxx,xx));
917 tl_assert(sameKindedAtoms(vyy,yy));
918
919 switch (ty) {
sewardj4cfa81b2012-11-08 10:58:16 +0000920 case Ity_I16:
921 opOR = Iop_Or16;
922 opDIFD = Iop_And16;
923 opUIFU = Iop_Or16;
924 opNOT = Iop_Not16;
925 opXOR = Iop_Xor16;
926 opCMP = Iop_CmpEQ16;
927 top = mkU16(0xFFFF);
928 break;
sewardjd5204dc2004-12-31 01:16:11 +0000929 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000930 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000931 opDIFD = Iop_And32;
932 opUIFU = Iop_Or32;
933 opNOT = Iop_Not32;
934 opXOR = Iop_Xor32;
935 opCMP = Iop_CmpEQ32;
936 top = mkU32(0xFFFFFFFF);
937 break;
tomcd986332005-04-26 07:44:48 +0000938 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000939 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000940 opDIFD = Iop_And64;
941 opUIFU = Iop_Or64;
942 opNOT = Iop_Not64;
943 opXOR = Iop_Xor64;
944 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000945 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000946 break;
sewardjd5204dc2004-12-31 01:16:11 +0000947 default:
948 VG_(tool_panic)("expensiveCmpEQorNE");
949 }
950
951 naive
sewardj7cf4e6b2008-05-01 20:24:26 +0000952 = mkPCastTo(mce,ty,
953 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +0000954
955 vec
956 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000957 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000958 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +0000959 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000960 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000961 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +0000962 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +0000963 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +0000964
sewardje6f8af42005-07-06 18:48:59 +0000965 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +0000966 = mkPCastTo( mce,ty,
967 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +0000968
969 improved
sewardj7cf4e6b2008-05-01 20:24:26 +0000970 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000971
972 final_cast
973 = mkPCastTo( mce, Ity_I1, improved );
974
975 return final_cast;
976}
977
sewardj95448072004-11-22 20:19:51 +0000978
sewardj992dff92005-10-07 11:08:55 +0000979/* --------- Semi-accurate interpretation of CmpORD. --------- */
980
981/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
982
983 CmpORD32S(x,y) = 1<<3 if x <s y
984 = 1<<2 if x >s y
985 = 1<<1 if x == y
986
987 and similarly the unsigned variant. The default interpretation is:
988
989 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000990 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000991
992 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
993 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000994
995 Also deal with a special case better:
996
997 CmpORD32S(x,0)
998
999 Here, bit 3 (LT) of the result is a copy of the top bit of x and
1000 will be defined even if the rest of x isn't. In which case we do:
1001
1002 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +00001003 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
1004 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +00001005
sewardj1bc82102005-12-23 00:16:24 +00001006 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +00001007*/
sewardja9e62a92005-10-07 12:13:21 +00001008static Bool isZeroU32 ( IRAtom* e )
1009{
1010 return
1011 toBool( e->tag == Iex_Const
1012 && e->Iex.Const.con->tag == Ico_U32
1013 && e->Iex.Const.con->Ico.U32 == 0 );
1014}
1015
sewardj1bc82102005-12-23 00:16:24 +00001016static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +00001017{
sewardj1bc82102005-12-23 00:16:24 +00001018 return
1019 toBool( e->tag == Iex_Const
1020 && e->Iex.Const.con->tag == Ico_U64
1021 && e->Iex.Const.con->Ico.U64 == 0 );
1022}
1023
1024static IRAtom* doCmpORD ( MCEnv* mce,
1025 IROp cmp_op,
1026 IRAtom* xxhash, IRAtom* yyhash,
1027 IRAtom* xx, IRAtom* yy )
1028{
1029 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
1030 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
1031 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
1032 IROp opAND = m64 ? Iop_And64 : Iop_And32;
1033 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
1034 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
1035 IRType ty = m64 ? Ity_I64 : Ity_I32;
1036 Int width = m64 ? 64 : 32;
1037
1038 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1039
1040 IRAtom* threeLeft1 = NULL;
1041 IRAtom* sevenLeft1 = NULL;
1042
sewardj992dff92005-10-07 11:08:55 +00001043 tl_assert(isShadowAtom(mce,xxhash));
1044 tl_assert(isShadowAtom(mce,yyhash));
1045 tl_assert(isOriginalAtom(mce,xx));
1046 tl_assert(isOriginalAtom(mce,yy));
1047 tl_assert(sameKindedAtoms(xxhash,xx));
1048 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +00001049 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1050 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +00001051
sewardja9e62a92005-10-07 12:13:21 +00001052 if (0) {
1053 ppIROp(cmp_op); VG_(printf)(" ");
1054 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1055 }
1056
sewardj1bc82102005-12-23 00:16:24 +00001057 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +00001058 /* fancy interpretation */
1059 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +00001060 tl_assert(isZero(yyhash));
1061 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +00001062 return
1063 binop(
sewardj1bc82102005-12-23 00:16:24 +00001064 opOR,
sewardja9e62a92005-10-07 12:13:21 +00001065 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001066 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001067 binop(
sewardj1bc82102005-12-23 00:16:24 +00001068 opAND,
1069 mkPCastTo(mce,ty, xxhash),
1070 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +00001071 )),
1072 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001073 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001074 binop(
sewardj1bc82102005-12-23 00:16:24 +00001075 opSHL,
sewardja9e62a92005-10-07 12:13:21 +00001076 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001077 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +00001078 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +00001079 mkU8(3)
1080 ))
1081 );
1082 } else {
1083 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +00001084 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +00001085 return
1086 binop(
sewardj1bc82102005-12-23 00:16:24 +00001087 opAND,
1088 mkPCastTo( mce,ty,
1089 mkUifU(mce,ty, xxhash,yyhash)),
1090 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +00001091 );
1092 }
sewardj992dff92005-10-07 11:08:55 +00001093}
1094
1095
sewardj95448072004-11-22 20:19:51 +00001096/*------------------------------------------------------------*/
1097/*--- Emit a test and complaint if something is undefined. ---*/
1098/*------------------------------------------------------------*/
1099
sewardj7cf4e6b2008-05-01 20:24:26 +00001100static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1101
1102
sewardj95448072004-11-22 20:19:51 +00001103/* Set the annotations on a dirty helper to indicate that the stack
1104 pointer and instruction pointers might be read. This is the
1105 behaviour of all 'emit-a-complaint' style functions we might
1106 call. */
1107
1108static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1109 di->nFxState = 2;
sewardj2eecb742012-06-01 16:11:41 +00001110 di->fxState[0].fx = Ifx_Read;
1111 di->fxState[0].offset = mce->layout->offset_SP;
1112 di->fxState[0].size = mce->layout->sizeof_SP;
1113 di->fxState[0].nRepeats = 0;
1114 di->fxState[0].repeatLen = 0;
1115 di->fxState[1].fx = Ifx_Read;
1116 di->fxState[1].offset = mce->layout->offset_IP;
1117 di->fxState[1].size = mce->layout->sizeof_IP;
1118 di->fxState[1].nRepeats = 0;
1119 di->fxState[1].repeatLen = 0;
sewardj95448072004-11-22 20:19:51 +00001120}
1121
1122
sewardjcafe5052013-01-17 14:24:35 +00001123/* Check the supplied *original* |atom| for undefinedness, and emit a
sewardj95448072004-11-22 20:19:51 +00001124 complaint if so. Once that happens, mark it as defined. This is
1125 possible because the atom is either a tmp or literal. If it's a
1126 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1127 be defined. In fact as mentioned above, we will have to allocate a
1128 new tmp to carry the new 'defined' shadow value, and update the
1129 original->tmp mapping accordingly; we cannot simply assign a new
sewardjcafe5052013-01-17 14:24:35 +00001130 value to an existing shadow tmp as this breaks SSAness.
1131
sewardjb9e6d242013-05-11 13:42:08 +00001132 The checks are performed, any resulting complaint emitted, and
1133 |atom|'s shadow temp set to 'defined', ONLY in the case that
1134 |guard| evaluates to True at run-time. If it evaluates to False
1135 then no action is performed. If |guard| is NULL (the usual case)
1136 then it is assumed to be always-true, and hence these actions are
1137 performed unconditionally.
1138
1139 This routine does not generate code to check the definedness of
1140 |guard|. The caller is assumed to have taken care of that already.
sewardj95448072004-11-22 20:19:51 +00001141*/
sewardjb9e6d242013-05-11 13:42:08 +00001142static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001143{
sewardj7cf97ee2004-11-28 14:25:01 +00001144 IRAtom* vatom;
1145 IRType ty;
1146 Int sz;
1147 IRDirty* di;
1148 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +00001149 IRAtom* origin;
1150 void* fn;
florian6bd9dc12012-11-23 16:17:43 +00001151 const HChar* nm;
sewardj7cf4e6b2008-05-01 20:24:26 +00001152 IRExpr** args;
1153 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001154
njn1d0825f2006-03-27 11:37:07 +00001155 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001156 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001157 return;
1158
sewardjb9e6d242013-05-11 13:42:08 +00001159 if (guard)
1160 tl_assert(isOriginalAtom(mce, guard));
1161
sewardj95448072004-11-22 20:19:51 +00001162 /* Since the original expression is atomic, there's no duplicated
1163 work generated by making multiple V-expressions for it. So we
1164 don't really care about the possibility that someone else may
1165 also create a V-interpretion for it. */
1166 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001167 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001168 tl_assert(isShadowAtom(mce, vatom));
1169 tl_assert(sameKindedAtoms(atom, vatom));
1170
sewardj1c0ce7a2009-07-01 08:10:49 +00001171 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001172
1173 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001174 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001175
sewardj7cf97ee2004-11-28 14:25:01 +00001176 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001177 /* cond will be 0 if all defined, and 1 if any not defined. */
1178
sewardj7cf4e6b2008-05-01 20:24:26 +00001179 /* Get the origin info for the value we are about to check. At
1180 least, if we are doing origin tracking. If not, use a dummy
1181 zero origin. */
1182 if (MC_(clo_mc_level) == 3) {
1183 origin = schemeE( mce, atom );
1184 if (mce->hWordTy == Ity_I64) {
1185 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1186 }
1187 } else {
1188 origin = NULL;
1189 }
1190
1191 fn = NULL;
1192 nm = NULL;
1193 args = NULL;
1194 nargs = -1;
1195
sewardj95448072004-11-22 20:19:51 +00001196 switch (sz) {
1197 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001198 if (origin) {
1199 fn = &MC_(helperc_value_check0_fail_w_o);
1200 nm = "MC_(helperc_value_check0_fail_w_o)";
1201 args = mkIRExprVec_1(origin);
1202 nargs = 1;
1203 } else {
1204 fn = &MC_(helperc_value_check0_fail_no_o);
1205 nm = "MC_(helperc_value_check0_fail_no_o)";
1206 args = mkIRExprVec_0();
1207 nargs = 0;
1208 }
sewardj95448072004-11-22 20:19:51 +00001209 break;
1210 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001211 if (origin) {
1212 fn = &MC_(helperc_value_check1_fail_w_o);
1213 nm = "MC_(helperc_value_check1_fail_w_o)";
1214 args = mkIRExprVec_1(origin);
1215 nargs = 1;
1216 } else {
1217 fn = &MC_(helperc_value_check1_fail_no_o);
1218 nm = "MC_(helperc_value_check1_fail_no_o)";
1219 args = mkIRExprVec_0();
1220 nargs = 0;
1221 }
sewardj95448072004-11-22 20:19:51 +00001222 break;
1223 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001224 if (origin) {
1225 fn = &MC_(helperc_value_check4_fail_w_o);
1226 nm = "MC_(helperc_value_check4_fail_w_o)";
1227 args = mkIRExprVec_1(origin);
1228 nargs = 1;
1229 } else {
1230 fn = &MC_(helperc_value_check4_fail_no_o);
1231 nm = "MC_(helperc_value_check4_fail_no_o)";
1232 args = mkIRExprVec_0();
1233 nargs = 0;
1234 }
sewardj95448072004-11-22 20:19:51 +00001235 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001236 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001237 if (origin) {
1238 fn = &MC_(helperc_value_check8_fail_w_o);
1239 nm = "MC_(helperc_value_check8_fail_w_o)";
1240 args = mkIRExprVec_1(origin);
1241 nargs = 1;
1242 } else {
1243 fn = &MC_(helperc_value_check8_fail_no_o);
1244 nm = "MC_(helperc_value_check8_fail_no_o)";
1245 args = mkIRExprVec_0();
1246 nargs = 0;
1247 }
sewardj11bcc4e2005-04-23 22:38:38 +00001248 break;
njn4c245e52009-03-15 23:25:38 +00001249 case 2:
1250 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001251 if (origin) {
1252 fn = &MC_(helperc_value_checkN_fail_w_o);
1253 nm = "MC_(helperc_value_checkN_fail_w_o)";
1254 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1255 nargs = 2;
1256 } else {
1257 fn = &MC_(helperc_value_checkN_fail_no_o);
1258 nm = "MC_(helperc_value_checkN_fail_no_o)";
1259 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1260 nargs = 1;
1261 }
sewardj95448072004-11-22 20:19:51 +00001262 break;
njn4c245e52009-03-15 23:25:38 +00001263 default:
1264 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001265 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001266
1267 tl_assert(fn);
1268 tl_assert(nm);
1269 tl_assert(args);
1270 tl_assert(nargs >= 0 && nargs <= 2);
1271 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1272 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1273
1274 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1275 VG_(fnptr_to_fnentry)( fn ), args );
sewardjb9e6d242013-05-11 13:42:08 +00001276 di->guard = cond; // and cond is PCast-to-1(atom#)
1277
1278 /* If the complaint is to be issued under a guard condition, AND
1279 that into the guard condition for the helper call. */
1280 if (guard) {
1281 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1282 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1283 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1284 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1285 }
florian434ffae2012-07-19 17:23:42 +00001286
sewardj95448072004-11-22 20:19:51 +00001287 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001288 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001289
sewardjb9e6d242013-05-11 13:42:08 +00001290 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1291 defined -- but only in the case where the guard evaluates to
1292 True at run-time. Do the update by setting the orig->shadow
1293 mapping for tmp to reflect the fact that this shadow is getting
1294 a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001295 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001296 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001297 if (vatom->tag == Iex_RdTmp) {
1298 tl_assert(atom->tag == Iex_RdTmp);
sewardjb9e6d242013-05-11 13:42:08 +00001299 if (guard == NULL) {
1300 // guard is 'always True', hence update unconditionally
1301 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1302 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1303 definedOfType(ty));
1304 } else {
1305 // update the temp only conditionally. Do this by copying
1306 // its old value when the guard is False.
1307 // The old value ..
1308 IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1309 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1310 IRAtom* new_tmpV
1311 = assignNew('V', mce, shadowTypeV(ty),
1312 IRExpr_ITE(guard, definedOfType(ty),
1313 mkexpr(old_tmpV)));
1314 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV);
1315 }
sewardj95448072004-11-22 20:19:51 +00001316 }
1317}
1318
1319
1320/*------------------------------------------------------------*/
1321/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1322/*------------------------------------------------------------*/
1323
1324/* Examine the always-defined sections declared in layout to see if
1325 the (offset,size) section is within one. Note, is is an error to
1326 partially fall into such a region: (offset,size) should either be
1327 completely in such a region or completely not-in such a region.
1328*/
1329static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1330{
1331 Int minoffD, maxoffD, i;
1332 Int minoff = offset;
1333 Int maxoff = minoff + size - 1;
1334 tl_assert((minoff & ~0xFFFF) == 0);
1335 tl_assert((maxoff & ~0xFFFF) == 0);
1336
1337 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1338 minoffD = mce->layout->alwaysDefd[i].offset;
1339 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1340 tl_assert((minoffD & ~0xFFFF) == 0);
1341 tl_assert((maxoffD & ~0xFFFF) == 0);
1342
1343 if (maxoff < minoffD || maxoffD < minoff)
1344 continue; /* no overlap */
1345 if (minoff >= minoffD && maxoff <= maxoffD)
1346 return True; /* completely contained in an always-defd section */
1347
1348 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1349 }
1350 return False; /* could not find any containing section */
1351}
1352
1353
1354/* Generate into bb suitable actions to shadow this Put. If the state
1355 slice is marked 'always defined', do nothing. Otherwise, write the
1356 supplied V bits to the shadow state. We can pass in either an
1357 original atom or a V-atom, but not both. In the former case the
1358 relevant V-bits are then generated from the original.
florian434ffae2012-07-19 17:23:42 +00001359 We assume here, that the definedness of GUARD has already been checked.
sewardj95448072004-11-22 20:19:51 +00001360*/
1361static
1362void do_shadow_PUT ( MCEnv* mce, Int offset,
florian434ffae2012-07-19 17:23:42 +00001363 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001364{
sewardj7cf97ee2004-11-28 14:25:01 +00001365 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001366
1367 // Don't do shadow PUTs if we're not doing undefined value checking.
1368 // Their absence lets Vex's optimiser remove all the shadow computation
1369 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001370 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001371 return;
1372
sewardj95448072004-11-22 20:19:51 +00001373 if (atom) {
1374 tl_assert(!vatom);
1375 tl_assert(isOriginalAtom(mce, atom));
1376 vatom = expr2vbits( mce, atom );
1377 } else {
1378 tl_assert(vatom);
1379 tl_assert(isShadowAtom(mce, vatom));
1380 }
1381
sewardj1c0ce7a2009-07-01 08:10:49 +00001382 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001383 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001384 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001385 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1386 /* later: no ... */
1387 /* emit code to emit a complaint if any of the vbits are 1. */
1388 /* complainIfUndefined(mce, atom); */
1389 } else {
1390 /* Do a plain shadow Put. */
florian434ffae2012-07-19 17:23:42 +00001391 if (guard) {
1392 /* If the guard expression evaluates to false we simply Put the value
1393 that is already stored in the guest state slot */
1394 IRAtom *cond, *iffalse;
1395
sewardjcc961652013-01-26 11:49:15 +00001396 cond = assignNew('V', mce, Ity_I1, guard);
florian434ffae2012-07-19 17:23:42 +00001397 iffalse = assignNew('V', mce, ty,
1398 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
florian5686b2d2013-01-29 03:57:40 +00001399 vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse));
florian434ffae2012-07-19 17:23:42 +00001400 }
1401 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
sewardj95448072004-11-22 20:19:51 +00001402 }
1403}
1404
1405
1406/* Return an expression which contains the V bits corresponding to the
1407 given GETI (passed in in pieces).
1408*/
1409static
floriand39b0222012-05-31 15:48:13 +00001410void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
sewardj95448072004-11-22 20:19:51 +00001411{
sewardj7cf97ee2004-11-28 14:25:01 +00001412 IRAtom* vatom;
1413 IRType ty, tyS;
1414 Int arrSize;;
floriand39b0222012-05-31 15:48:13 +00001415 IRRegArray* descr = puti->descr;
1416 IRAtom* ix = puti->ix;
1417 Int bias = puti->bias;
1418 IRAtom* atom = puti->data;
sewardj7cf97ee2004-11-28 14:25:01 +00001419
njn1d0825f2006-03-27 11:37:07 +00001420 // Don't do shadow PUTIs if we're not doing undefined value checking.
1421 // Their absence lets Vex's optimiser remove all the shadow computation
1422 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001423 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001424 return;
1425
sewardj95448072004-11-22 20:19:51 +00001426 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001427 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001428 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001429 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001430 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001431 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001432 tl_assert(ty != Ity_I1);
1433 tl_assert(isOriginalAtom(mce,ix));
sewardjb9e6d242013-05-11 13:42:08 +00001434 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001435 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1436 /* later: no ... */
1437 /* emit code to emit a complaint if any of the vbits are 1. */
1438 /* complainIfUndefined(mce, atom); */
1439 } else {
1440 /* Do a cloned version of the Put that refers to the shadow
1441 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001442 IRRegArray* new_descr
1443 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1444 tyS, descr->nElems);
floriand39b0222012-05-31 15:48:13 +00001445 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
sewardj95448072004-11-22 20:19:51 +00001446 }
1447}
1448
1449
1450/* Return an expression which contains the V bits corresponding to the
1451 given GET (passed in in pieces).
1452*/
1453static
1454IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1455{
sewardj7cf4e6b2008-05-01 20:24:26 +00001456 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001457 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001458 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001459 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1460 /* Always defined, return all zeroes of the relevant type */
1461 return definedOfType(tyS);
1462 } else {
1463 /* return a cloned version of the Get that refers to the shadow
1464 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001465 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001466 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1467 }
1468}
1469
1470
1471/* Return an expression which contains the V bits corresponding to the
1472 given GETI (passed in in pieces).
1473*/
1474static
sewardj0b9d74a2006-12-24 02:24:11 +00001475IRExpr* shadow_GETI ( MCEnv* mce,
1476 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001477{
1478 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001479 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001480 Int arrSize = descr->nElems * sizeofIRType(ty);
1481 tl_assert(ty != Ity_I1);
1482 tl_assert(isOriginalAtom(mce,ix));
sewardjb9e6d242013-05-11 13:42:08 +00001483 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001484 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1485 /* Always defined, return all zeroes of the relevant type */
1486 return definedOfType(tyS);
1487 } else {
1488 /* return a cloned version of the Get that refers to the shadow
1489 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001490 IRRegArray* new_descr
1491 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1492 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001493 return IRExpr_GetI( new_descr, ix, bias );
1494 }
1495}
1496
1497
1498/*------------------------------------------------------------*/
1499/*--- Generating approximations for unknown operations, ---*/
1500/*--- using lazy-propagate semantics ---*/
1501/*------------------------------------------------------------*/
1502
1503/* Lazy propagation of undefinedness from two values, resulting in the
1504 specified shadow type.
1505*/
1506static
1507IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1508{
sewardj95448072004-11-22 20:19:51 +00001509 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001510 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1511 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001512 tl_assert(isShadowAtom(mce,va1));
1513 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001514
1515 /* The general case is inefficient because PCast is an expensive
1516 operation. Here are some special cases which use PCast only
1517 once rather than twice. */
1518
1519 /* I64 x I64 -> I64 */
1520 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1521 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1522 at = mkUifU(mce, Ity_I64, va1, va2);
1523 at = mkPCastTo(mce, Ity_I64, at);
1524 return at;
1525 }
1526
1527 /* I64 x I64 -> I32 */
1528 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1529 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1530 at = mkUifU(mce, Ity_I64, va1, va2);
1531 at = mkPCastTo(mce, Ity_I32, at);
1532 return at;
1533 }
1534
1535 if (0) {
1536 VG_(printf)("mkLazy2 ");
1537 ppIRType(t1);
1538 VG_(printf)("_");
1539 ppIRType(t2);
1540 VG_(printf)("_");
1541 ppIRType(finalVty);
1542 VG_(printf)("\n");
1543 }
1544
1545 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001546 at = mkPCastTo(mce, Ity_I32, va1);
1547 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1548 at = mkPCastTo(mce, finalVty, at);
1549 return at;
1550}
1551
1552
sewardjed69fdb2006-02-03 16:12:27 +00001553/* 3-arg version of the above. */
1554static
1555IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1556 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1557{
1558 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001559 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1560 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1561 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001562 tl_assert(isShadowAtom(mce,va1));
1563 tl_assert(isShadowAtom(mce,va2));
1564 tl_assert(isShadowAtom(mce,va3));
1565
1566 /* The general case is inefficient because PCast is an expensive
1567 operation. Here are some special cases which use PCast only
1568 twice rather than three times. */
1569
1570 /* I32 x I64 x I64 -> I64 */
1571 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1572 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1573 && finalVty == Ity_I64) {
1574 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1575 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1576 mode indication which is fully defined, this should get
1577 folded out later. */
1578 at = mkPCastTo(mce, Ity_I64, va1);
1579 /* Now fold in 2nd and 3rd args. */
1580 at = mkUifU(mce, Ity_I64, at, va2);
1581 at = mkUifU(mce, Ity_I64, at, va3);
1582 /* and PCast once again. */
1583 at = mkPCastTo(mce, Ity_I64, at);
1584 return at;
1585 }
1586
carllfb583cb2013-01-22 20:26:34 +00001587 /* I32 x I8 x I64 -> I64 */
1588 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64
1589 && finalVty == Ity_I64) {
1590 if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n");
1591 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a
1592 * rounding mode indication which is fully defined, this should
1593 * get folded out later.
1594 */
1595 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1596 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1597 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1598 at = mkUifU(mce, Ity_I64, at, va3);
1599 /* and PCast once again. */
1600 at = mkPCastTo(mce, Ity_I64, at);
1601 return at;
1602 }
1603
sewardj453e8f82006-02-09 03:25:06 +00001604 /* I32 x I64 x I64 -> I32 */
1605 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1606 && finalVty == Ity_I32) {
sewardj59570ff2010-01-01 11:59:33 +00001607 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
sewardj453e8f82006-02-09 03:25:06 +00001608 at = mkPCastTo(mce, Ity_I64, va1);
1609 at = mkUifU(mce, Ity_I64, at, va2);
1610 at = mkUifU(mce, Ity_I64, at, va3);
1611 at = mkPCastTo(mce, Ity_I32, at);
1612 return at;
1613 }
1614
sewardj59570ff2010-01-01 11:59:33 +00001615 /* I32 x I32 x I32 -> I32 */
1616 /* 32-bit FP idiom, as (eg) happens on ARM */
1617 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1618 && finalVty == Ity_I32) {
1619 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1620 at = va1;
1621 at = mkUifU(mce, Ity_I32, at, va2);
1622 at = mkUifU(mce, Ity_I32, at, va3);
1623 at = mkPCastTo(mce, Ity_I32, at);
1624 return at;
1625 }
1626
sewardjb5b87402011-03-07 16:05:35 +00001627 /* I32 x I128 x I128 -> I128 */
1628 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1629 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1630 && finalVty == Ity_I128) {
1631 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1632 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1633 mode indication which is fully defined, this should get
1634 folded out later. */
1635 at = mkPCastTo(mce, Ity_I128, va1);
1636 /* Now fold in 2nd and 3rd args. */
1637 at = mkUifU(mce, Ity_I128, at, va2);
1638 at = mkUifU(mce, Ity_I128, at, va3);
1639 /* and PCast once again. */
1640 at = mkPCastTo(mce, Ity_I128, at);
1641 return at;
1642 }
carllfb583cb2013-01-22 20:26:34 +00001643
1644 /* I32 x I8 x I128 -> I128 */
1645 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1646 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128
1647 && finalVty == Ity_I128) {
1648 if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n");
sewardja28c43c2013-01-29 17:18:56 +00001649 /* Use I64 as an intermediate type, which means PCasting all 3
1650 args to I64 to start with. 1st arg is typically a rounding
1651 mode indication which is fully defined, so we hope that it
1652 will get folded out later. */
carllfb583cb2013-01-22 20:26:34 +00001653 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1654 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
sewardja28c43c2013-01-29 17:18:56 +00001655 IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3);
1656 /* Now UifU all three together. */
carllfb583cb2013-01-22 20:26:34 +00001657 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
sewardja28c43c2013-01-29 17:18:56 +00001658 at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3)
carllfb583cb2013-01-22 20:26:34 +00001659 /* and PCast once again. */
1660 at = mkPCastTo(mce, Ity_I128, at);
1661 return at;
1662 }
sewardj453e8f82006-02-09 03:25:06 +00001663 if (1) {
1664 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001665 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001666 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001667 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001668 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001669 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001670 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001671 ppIRType(finalVty);
1672 VG_(printf)("\n");
1673 }
1674
sewardj453e8f82006-02-09 03:25:06 +00001675 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001676 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001677 /*
sewardjed69fdb2006-02-03 16:12:27 +00001678 at = mkPCastTo(mce, Ity_I32, va1);
1679 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1680 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1681 at = mkPCastTo(mce, finalVty, at);
1682 return at;
sewardj453e8f82006-02-09 03:25:06 +00001683 */
sewardjed69fdb2006-02-03 16:12:27 +00001684}
1685
1686
sewardje91cea72006-02-08 19:32:02 +00001687/* 4-arg version of the above. */
1688static
1689IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1690 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1691{
1692 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001693 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1694 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1695 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1696 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001697 tl_assert(isShadowAtom(mce,va1));
1698 tl_assert(isShadowAtom(mce,va2));
1699 tl_assert(isShadowAtom(mce,va3));
1700 tl_assert(isShadowAtom(mce,va4));
1701
1702 /* The general case is inefficient because PCast is an expensive
1703 operation. Here are some special cases which use PCast only
1704 twice rather than three times. */
1705
1706 /* I32 x I64 x I64 x I64 -> I64 */
1707 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1708 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1709 && finalVty == Ity_I64) {
1710 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1711 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1712 mode indication which is fully defined, this should get
1713 folded out later. */
1714 at = mkPCastTo(mce, Ity_I64, va1);
1715 /* Now fold in 2nd, 3rd, 4th args. */
1716 at = mkUifU(mce, Ity_I64, at, va2);
1717 at = mkUifU(mce, Ity_I64, at, va3);
1718 at = mkUifU(mce, Ity_I64, at, va4);
1719 /* and PCast once again. */
1720 at = mkPCastTo(mce, Ity_I64, at);
1721 return at;
1722 }
sewardjb5b87402011-03-07 16:05:35 +00001723 /* I32 x I32 x I32 x I32 -> I32 */
1724 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1725 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1726 && finalVty == Ity_I32) {
1727 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1728 at = va1;
1729 /* Now fold in 2nd, 3rd, 4th args. */
1730 at = mkUifU(mce, Ity_I32, at, va2);
1731 at = mkUifU(mce, Ity_I32, at, va3);
1732 at = mkUifU(mce, Ity_I32, at, va4);
1733 at = mkPCastTo(mce, Ity_I32, at);
1734 return at;
1735 }
sewardje91cea72006-02-08 19:32:02 +00001736
1737 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001738 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001739 ppIRType(t1);
1740 VG_(printf)(" x ");
1741 ppIRType(t2);
1742 VG_(printf)(" x ");
1743 ppIRType(t3);
1744 VG_(printf)(" x ");
1745 ppIRType(t4);
1746 VG_(printf)(" -> ");
1747 ppIRType(finalVty);
1748 VG_(printf)("\n");
1749 }
1750
1751 tl_assert(0);
1752}
1753
1754
sewardj95448072004-11-22 20:19:51 +00001755/* Do the lazy propagation game from a null-terminated vector of
1756 atoms. This is presumably the arguments to a helper call, so the
1757 IRCallee info is also supplied in order that we can know which
1758 arguments should be ignored (via the .mcx_mask field).
1759*/
1760static
1761IRAtom* mkLazyN ( MCEnv* mce,
1762 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1763{
sewardj4cc684b2007-08-25 23:09:36 +00001764 Int i;
sewardj95448072004-11-22 20:19:51 +00001765 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001766 IRAtom* curr;
1767 IRType mergeTy;
sewardj99430032011-05-04 09:09:31 +00001768 Bool mergeTy64 = True;
sewardj4cc684b2007-08-25 23:09:36 +00001769
1770 /* Decide on the type of the merge intermediary. If all relevant
1771 args are I64, then it's I64. In all other circumstances, use
1772 I32. */
1773 for (i = 0; exprvec[i]; i++) {
1774 tl_assert(i < 32);
1775 tl_assert(isOriginalAtom(mce, exprvec[i]));
1776 if (cee->mcx_mask & (1<<i))
1777 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001778 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001779 mergeTy64 = False;
1780 }
1781
1782 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1783 curr = definedOfType(mergeTy);
1784
sewardj95448072004-11-22 20:19:51 +00001785 for (i = 0; exprvec[i]; i++) {
1786 tl_assert(i < 32);
1787 tl_assert(isOriginalAtom(mce, exprvec[i]));
1788 /* Only take notice of this arg if the callee's mc-exclusion
1789 mask does not say it is to be excluded. */
1790 if (cee->mcx_mask & (1<<i)) {
1791 /* the arg is to be excluded from definedness checking. Do
1792 nothing. */
1793 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1794 } else {
1795 /* calculate the arg's definedness, and pessimistically merge
1796 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001797 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1798 curr = mergeTy64
1799 ? mkUifU64(mce, here, curr)
1800 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001801 }
1802 }
1803 return mkPCastTo(mce, finalVtype, curr );
1804}
1805
1806
1807/*------------------------------------------------------------*/
1808/*--- Generating expensive sequences for exact carry-chain ---*/
1809/*--- propagation in add/sub and related operations. ---*/
1810/*------------------------------------------------------------*/
1811
1812static
sewardjd5204dc2004-12-31 01:16:11 +00001813IRAtom* expensiveAddSub ( MCEnv* mce,
1814 Bool add,
1815 IRType ty,
1816 IRAtom* qaa, IRAtom* qbb,
1817 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001818{
sewardj7cf97ee2004-11-28 14:25:01 +00001819 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001820 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001821
sewardj95448072004-11-22 20:19:51 +00001822 tl_assert(isShadowAtom(mce,qaa));
1823 tl_assert(isShadowAtom(mce,qbb));
1824 tl_assert(isOriginalAtom(mce,aa));
1825 tl_assert(isOriginalAtom(mce,bb));
1826 tl_assert(sameKindedAtoms(qaa,aa));
1827 tl_assert(sameKindedAtoms(qbb,bb));
1828
sewardjd5204dc2004-12-31 01:16:11 +00001829 switch (ty) {
1830 case Ity_I32:
1831 opAND = Iop_And32;
1832 opOR = Iop_Or32;
1833 opXOR = Iop_Xor32;
1834 opNOT = Iop_Not32;
1835 opADD = Iop_Add32;
1836 opSUB = Iop_Sub32;
1837 break;
tomd9774d72005-06-27 08:11:01 +00001838 case Ity_I64:
1839 opAND = Iop_And64;
1840 opOR = Iop_Or64;
1841 opXOR = Iop_Xor64;
1842 opNOT = Iop_Not64;
1843 opADD = Iop_Add64;
1844 opSUB = Iop_Sub64;
1845 break;
sewardjd5204dc2004-12-31 01:16:11 +00001846 default:
1847 VG_(tool_panic)("expensiveAddSub");
1848 }
sewardj95448072004-11-22 20:19:51 +00001849
1850 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001851 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001852 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001853 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001854
1855 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001856 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001857 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001858 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001859
1860 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001861 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001862
1863 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001864 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001865
sewardjd5204dc2004-12-31 01:16:11 +00001866 if (add) {
1867 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1868 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001869 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001870 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001871 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1872 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001873 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001874 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1875 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001876 )
sewardj95448072004-11-22 20:19:51 +00001877 )
sewardjd5204dc2004-12-31 01:16:11 +00001878 )
1879 );
1880 } else {
1881 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1882 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001883 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001884 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001885 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1886 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001887 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001888 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1889 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001890 )
1891 )
1892 )
1893 );
1894 }
1895
sewardj95448072004-11-22 20:19:51 +00001896}
1897
1898
sewardj4cfa81b2012-11-08 10:58:16 +00001899static
1900IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
1901 IRAtom* atom, IRAtom* vatom )
1902{
1903 IRType ty;
1904 IROp xorOp, subOp, andOp;
1905 IRExpr *one;
1906 IRAtom *improver, *improved;
1907 tl_assert(isShadowAtom(mce,vatom));
1908 tl_assert(isOriginalAtom(mce,atom));
1909 tl_assert(sameKindedAtoms(atom,vatom));
1910
1911 switch (czop) {
1912 case Iop_Ctz32:
1913 ty = Ity_I32;
1914 xorOp = Iop_Xor32;
1915 subOp = Iop_Sub32;
1916 andOp = Iop_And32;
1917 one = mkU32(1);
1918 break;
1919 case Iop_Ctz64:
1920 ty = Ity_I64;
1921 xorOp = Iop_Xor64;
1922 subOp = Iop_Sub64;
1923 andOp = Iop_And64;
1924 one = mkU64(1);
1925 break;
1926 default:
1927 ppIROp(czop);
1928 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
1929 }
1930
1931 // improver = atom ^ (atom - 1)
1932 //
1933 // That is, improver has its low ctz(atom) bits equal to one;
1934 // higher bits (if any) equal to zero.
1935 improver = assignNew('V', mce,ty,
1936 binop(xorOp,
1937 atom,
1938 assignNew('V', mce, ty,
1939 binop(subOp, atom, one))));
1940
1941 // improved = vatom & improver
1942 //
1943 // That is, treat any V bits above the first ctz(atom) bits as
1944 // "defined".
1945 improved = assignNew('V', mce, ty,
1946 binop(andOp, vatom, improver));
1947
1948 // Return pessimizing cast of improved.
1949 return mkPCastTo(mce, ty, improved);
1950}
1951
1952
sewardj95448072004-11-22 20:19:51 +00001953/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001954/*--- Scalar shifts. ---*/
1955/*------------------------------------------------------------*/
1956
1957/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1958 idea is to shift the definedness bits by the original shift amount.
1959 This introduces 0s ("defined") in new positions for left shifts and
1960 unsigned right shifts, and copies the top definedness bit for
1961 signed right shifts. So, conveniently, applying the original shift
1962 operator to the definedness bits for the left arg is exactly the
1963 right thing to do:
1964
1965 (qaa << bb)
1966
1967 However if the shift amount is undefined then the whole result
1968 is undefined. Hence need:
1969
1970 (qaa << bb) `UifU` PCast(qbb)
1971
1972 If the shift amount bb is a literal than qbb will say 'all defined'
1973 and the UifU and PCast will get folded out by post-instrumentation
1974 optimisation.
1975*/
1976static IRAtom* scalarShift ( MCEnv* mce,
1977 IRType ty,
1978 IROp original_op,
1979 IRAtom* qaa, IRAtom* qbb,
1980 IRAtom* aa, IRAtom* bb )
1981{
1982 tl_assert(isShadowAtom(mce,qaa));
1983 tl_assert(isShadowAtom(mce,qbb));
1984 tl_assert(isOriginalAtom(mce,aa));
1985 tl_assert(isOriginalAtom(mce,bb));
1986 tl_assert(sameKindedAtoms(qaa,aa));
1987 tl_assert(sameKindedAtoms(qbb,bb));
1988 return
1989 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001990 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00001991 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00001992 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00001993 mkPCastTo(mce, ty, qbb)
1994 )
1995 );
1996}
1997
1998
1999/*------------------------------------------------------------*/
2000/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00002001/*------------------------------------------------------------*/
2002
sewardja1d93302004-12-12 16:45:06 +00002003/* Vector pessimisation -- pessimise within each lane individually. */
2004
2005static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
2006{
sewardj7cf4e6b2008-05-01 20:24:26 +00002007 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00002008}
2009
2010static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
2011{
sewardj7cf4e6b2008-05-01 20:24:26 +00002012 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00002013}
2014
2015static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
2016{
sewardj7cf4e6b2008-05-01 20:24:26 +00002017 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00002018}
2019
2020static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
2021{
sewardj7cf4e6b2008-05-01 20:24:26 +00002022 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00002023}
2024
sewardj350e8f72012-06-25 07:52:15 +00002025static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
2026{
2027 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
2028}
2029
2030static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
2031{
2032 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
2033}
2034
sewardjacd2e912005-01-13 19:17:06 +00002035static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
2036{
sewardj7cf4e6b2008-05-01 20:24:26 +00002037 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00002038}
2039
sewardja2f30952013-03-27 11:40:02 +00002040static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at )
2041{
2042 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at));
2043}
2044
sewardjacd2e912005-01-13 19:17:06 +00002045static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
2046{
sewardj7cf4e6b2008-05-01 20:24:26 +00002047 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00002048}
2049
sewardja2f30952013-03-27 11:40:02 +00002050static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at )
2051{
2052 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at));
2053}
2054
sewardjacd2e912005-01-13 19:17:06 +00002055static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
2056{
sewardj7cf4e6b2008-05-01 20:24:26 +00002057 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00002058}
2059
sewardjc678b852010-09-22 00:58:51 +00002060static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
2061{
2062 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
2063}
2064
2065static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
2066{
2067 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
2068}
2069
sewardja1d93302004-12-12 16:45:06 +00002070
sewardj3245c912004-12-10 14:58:26 +00002071/* Here's a simple scheme capable of handling ops derived from SSE1
2072 code and while only generating ops that can be efficiently
2073 implemented in SSE1. */
2074
2075/* All-lanes versions are straightforward:
2076
sewardj20d38f22005-02-07 23:50:18 +00002077 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00002078
2079 unary32Fx4(x,y) ==> PCast32x4(x#)
2080
2081 Lowest-lane-only versions are more complex:
2082
sewardj20d38f22005-02-07 23:50:18 +00002083 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00002084 x#,
sewardj20d38f22005-02-07 23:50:18 +00002085 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00002086 )
2087
2088 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00002089 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00002090 obvious scheme of taking the bottom 32 bits of each operand
2091 and doing a 32-bit UifU. Basically since UifU is fast and
2092 chopping lanes off vector values is slow.
2093
2094 Finally:
2095
sewardj20d38f22005-02-07 23:50:18 +00002096 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00002097 x#,
sewardj20d38f22005-02-07 23:50:18 +00002098 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00002099 )
2100
2101 Where:
2102
2103 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2104 PCast32x4(v#) = CmpNEZ32x4(v#)
2105*/
2106
2107static
2108IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2109{
2110 IRAtom* at;
2111 tl_assert(isShadowAtom(mce, vatomX));
2112 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002113 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002114 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00002115 return at;
2116}
2117
2118static
2119IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
2120{
2121 IRAtom* at;
2122 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002123 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00002124 return at;
2125}
2126
2127static
2128IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2129{
2130 IRAtom* at;
2131 tl_assert(isShadowAtom(mce, vatomX));
2132 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002133 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002134 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00002135 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002136 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00002137 return at;
2138}
2139
2140static
2141IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
2142{
2143 IRAtom* at;
2144 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002145 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00002146 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002147 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00002148 return at;
2149}
2150
sewardj0b070592004-12-10 21:44:22 +00002151/* --- ... and ... 64Fx2 versions of the same ... --- */
2152
2153static
2154IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2155{
2156 IRAtom* at;
2157 tl_assert(isShadowAtom(mce, vatomX));
2158 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002159 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002160 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00002161 return at;
2162}
2163
2164static
2165IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
2166{
2167 IRAtom* at;
2168 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002169 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002170 return at;
2171}
2172
2173static
2174IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2175{
2176 IRAtom* at;
2177 tl_assert(isShadowAtom(mce, vatomX));
2178 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002179 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002180 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00002181 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002182 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002183 return at;
2184}
2185
2186static
2187IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2188{
2189 IRAtom* at;
2190 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002191 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002192 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002193 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002194 return at;
2195}
2196
sewardj57f92b02010-08-22 11:54:14 +00002197/* --- --- ... and ... 32Fx2 versions of the same --- --- */
2198
2199static
2200IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2201{
2202 IRAtom* at;
2203 tl_assert(isShadowAtom(mce, vatomX));
2204 tl_assert(isShadowAtom(mce, vatomY));
2205 at = mkUifU64(mce, vatomX, vatomY);
2206 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2207 return at;
2208}
2209
2210static
2211IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2212{
2213 IRAtom* at;
2214 tl_assert(isShadowAtom(mce, vatomX));
2215 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2216 return at;
2217}
2218
sewardj350e8f72012-06-25 07:52:15 +00002219/* --- ... and ... 64Fx4 versions of the same ... --- */
2220
2221static
2222IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2223{
2224 IRAtom* at;
2225 tl_assert(isShadowAtom(mce, vatomX));
2226 tl_assert(isShadowAtom(mce, vatomY));
2227 at = mkUifUV256(mce, vatomX, vatomY);
2228 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2229 return at;
2230}
2231
2232static
2233IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2234{
2235 IRAtom* at;
2236 tl_assert(isShadowAtom(mce, vatomX));
2237 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2238 return at;
2239}
2240
2241/* --- ... and ... 32Fx8 versions of the same ... --- */
2242
2243static
2244IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2245{
2246 IRAtom* at;
2247 tl_assert(isShadowAtom(mce, vatomX));
2248 tl_assert(isShadowAtom(mce, vatomY));
2249 at = mkUifUV256(mce, vatomX, vatomY);
2250 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2251 return at;
2252}
2253
2254static
2255IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2256{
2257 IRAtom* at;
2258 tl_assert(isShadowAtom(mce, vatomX));
2259 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2260 return at;
2261}
2262
sewardj1eb272f2014-01-26 18:36:52 +00002263/* --- 64Fx2 binary FP ops, with rounding mode --- */
2264
2265static
2266IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM,
2267 IRAtom* vatomX, IRAtom* vatomY )
2268{
2269 /* This is the same as binary64Fx2, except that we subsequently
2270 pessimise vRM (definedness of the rounding mode), widen to 128
2271 bits and UifU it into the result. As with the scalar cases, if
2272 the RM is a constant then it is defined and so this extra bit
2273 will get constant-folded out later. */
2274 // "do" the vector args
2275 IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY);
2276 // PCast the RM, and widen it to 128 bits
2277 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2278 // Roll it into the result
2279 t1 = mkUifUV128(mce, t1, t2);
2280 return t1;
2281}
2282
2283/* --- ... and ... 32Fx4 versions of the same --- */
2284
2285static
2286IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2287 IRAtom* vatomX, IRAtom* vatomY )
2288{
2289 IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY);
2290 // PCast the RM, and widen it to 128 bits
2291 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2292 // Roll it into the result
2293 t1 = mkUifUV128(mce, t1, t2);
2294 return t1;
2295}
2296
2297/* --- ... and ... 64Fx4 versions of the same --- */
2298
2299static
2300IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2301 IRAtom* vatomX, IRAtom* vatomY )
2302{
2303 IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY);
2304 // PCast the RM, and widen it to 256 bits
2305 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2306 // Roll it into the result
2307 t1 = mkUifUV256(mce, t1, t2);
2308 return t1;
2309}
2310
2311/* --- ... and ... 32Fx8 versions of the same --- */
2312
2313static
2314IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM,
2315 IRAtom* vatomX, IRAtom* vatomY )
2316{
2317 IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY);
2318 // PCast the RM, and widen it to 256 bits
2319 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2320 // Roll it into the result
2321 t1 = mkUifUV256(mce, t1, t2);
2322 return t1;
2323}
2324
2325
sewardja1d93302004-12-12 16:45:06 +00002326/* --- --- Vector saturated narrowing --- --- */
2327
sewardjb5a29232011-10-22 09:29:41 +00002328/* We used to do something very clever here, but on closer inspection
2329 (2011-Jun-15), and in particular bug #279698, it turns out to be
2330 wrong. Part of the problem came from the fact that for a long
2331 time, the IR primops to do with saturated narrowing were
2332 underspecified and managed to confuse multiple cases which needed
2333 to be separate: the op names had a signedness qualifier, but in
2334 fact the source and destination signednesses needed to be specified
2335 independently, so the op names really need two independent
2336 signedness specifiers.
sewardja1d93302004-12-12 16:45:06 +00002337
sewardjb5a29232011-10-22 09:29:41 +00002338 As of 2011-Jun-15 (ish) the underspecification was sorted out
2339 properly. The incorrect instrumentation remained, though. That
2340 has now (2011-Oct-22) been fixed.
sewardja1d93302004-12-12 16:45:06 +00002341
sewardjb5a29232011-10-22 09:29:41 +00002342 What we now do is simple:
sewardja1d93302004-12-12 16:45:06 +00002343
sewardjb5a29232011-10-22 09:29:41 +00002344 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2345 number of lanes, X is the source lane width and signedness, and Y
2346 is the destination lane width and signedness. In all cases the
2347 destination lane width is half the source lane width, so the names
2348 have a bit of redundancy, but are at least easy to read.
sewardja1d93302004-12-12 16:45:06 +00002349
sewardjb5a29232011-10-22 09:29:41 +00002350 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2351 to unsigned 16s.
sewardja1d93302004-12-12 16:45:06 +00002352
sewardjb5a29232011-10-22 09:29:41 +00002353 Let Vanilla(OP) be a function that takes OP, one of these
2354 saturating narrowing ops, and produces the same "shaped" narrowing
2355 op which is not saturating, but merely dumps the most significant
2356 bits. "same shape" means that the lane numbers and widths are the
2357 same as with OP.
sewardja1d93302004-12-12 16:45:06 +00002358
sewardjb5a29232011-10-22 09:29:41 +00002359 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2360 = Iop_NarrowBin32to16x8,
2361 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2362 dumping the top half of each lane.
sewardja1d93302004-12-12 16:45:06 +00002363
sewardjb5a29232011-10-22 09:29:41 +00002364 So, with that in place, the scheme is simple, and it is simple to
2365 pessimise each lane individually and then apply Vanilla(OP) so as
2366 to get the result in the right "shape". If the original OP is
2367 QNarrowBinXtoYxZ then we produce
sewardja1d93302004-12-12 16:45:06 +00002368
sewardjb5a29232011-10-22 09:29:41 +00002369 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
sewardj9beeb0a2011-06-15 15:11:07 +00002370
sewardjb5a29232011-10-22 09:29:41 +00002371 or for the case when OP is unary (Iop_QNarrowUn*)
2372
2373 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
sewardja1d93302004-12-12 16:45:06 +00002374*/
2375static
sewardjb5a29232011-10-22 09:29:41 +00002376IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2377{
2378 switch (qnarrowOp) {
2379 /* Binary: (128, 128) -> 128 */
2380 case Iop_QNarrowBin16Sto8Ux16:
2381 case Iop_QNarrowBin16Sto8Sx16:
2382 case Iop_QNarrowBin16Uto8Ux16:
carll62770672013-10-01 15:50:09 +00002383 case Iop_QNarrowBin64Sto32Sx4:
2384 case Iop_QNarrowBin64Uto32Ux4:
sewardjb5a29232011-10-22 09:29:41 +00002385 return Iop_NarrowBin16to8x16;
2386 case Iop_QNarrowBin32Sto16Ux8:
2387 case Iop_QNarrowBin32Sto16Sx8:
2388 case Iop_QNarrowBin32Uto16Ux8:
2389 return Iop_NarrowBin32to16x8;
2390 /* Binary: (64, 64) -> 64 */
2391 case Iop_QNarrowBin32Sto16Sx4:
2392 return Iop_NarrowBin32to16x4;
2393 case Iop_QNarrowBin16Sto8Ux8:
2394 case Iop_QNarrowBin16Sto8Sx8:
2395 return Iop_NarrowBin16to8x8;
2396 /* Unary: 128 -> 64 */
2397 case Iop_QNarrowUn64Uto32Ux2:
2398 case Iop_QNarrowUn64Sto32Sx2:
2399 case Iop_QNarrowUn64Sto32Ux2:
2400 return Iop_NarrowUn64to32x2;
2401 case Iop_QNarrowUn32Uto16Ux4:
2402 case Iop_QNarrowUn32Sto16Sx4:
2403 case Iop_QNarrowUn32Sto16Ux4:
2404 return Iop_NarrowUn32to16x4;
2405 case Iop_QNarrowUn16Uto8Ux8:
2406 case Iop_QNarrowUn16Sto8Sx8:
2407 case Iop_QNarrowUn16Sto8Ux8:
2408 return Iop_NarrowUn16to8x8;
2409 default:
2410 ppIROp(qnarrowOp);
2411 VG_(tool_panic)("vanillaNarrowOpOfShape");
2412 }
2413}
2414
2415static
sewardj7ee7d852011-06-16 11:37:21 +00002416IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2417 IRAtom* vatom1, IRAtom* vatom2)
sewardja1d93302004-12-12 16:45:06 +00002418{
2419 IRAtom *at1, *at2, *at3;
2420 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2421 switch (narrow_op) {
carll62770672013-10-01 15:50:09 +00002422 case Iop_QNarrowBin64Sto32Sx4: pcast = mkPCast32x4; break;
2423 case Iop_QNarrowBin64Uto32Ux4: pcast = mkPCast32x4; break;
sewardj7ee7d852011-06-16 11:37:21 +00002424 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2425 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2426 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2427 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2428 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2429 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2430 default: VG_(tool_panic)("vectorNarrowBinV128");
sewardja1d93302004-12-12 16:45:06 +00002431 }
sewardjb5a29232011-10-22 09:29:41 +00002432 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardja1d93302004-12-12 16:45:06 +00002433 tl_assert(isShadowAtom(mce,vatom1));
2434 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002435 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2436 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002437 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00002438 return at3;
2439}
2440
sewardjacd2e912005-01-13 19:17:06 +00002441static
sewardj7ee7d852011-06-16 11:37:21 +00002442IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2443 IRAtom* vatom1, IRAtom* vatom2)
sewardjacd2e912005-01-13 19:17:06 +00002444{
2445 IRAtom *at1, *at2, *at3;
2446 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2447 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002448 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2449 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2450 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2451 default: VG_(tool_panic)("vectorNarrowBin64");
sewardjacd2e912005-01-13 19:17:06 +00002452 }
sewardjb5a29232011-10-22 09:29:41 +00002453 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardjacd2e912005-01-13 19:17:06 +00002454 tl_assert(isShadowAtom(mce,vatom1));
2455 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002456 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2457 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002458 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00002459 return at3;
2460}
2461
sewardj57f92b02010-08-22 11:54:14 +00002462static
sewardjb5a29232011-10-22 09:29:41 +00002463IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
sewardj7ee7d852011-06-16 11:37:21 +00002464 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002465{
2466 IRAtom *at1, *at2;
2467 IRAtom* (*pcast)( MCEnv*, IRAtom* );
sewardjb5a29232011-10-22 09:29:41 +00002468 tl_assert(isShadowAtom(mce,vatom1));
2469 /* For vanilla narrowing (non-saturating), we can just apply
2470 the op directly to the V bits. */
2471 switch (narrow_op) {
2472 case Iop_NarrowUn16to8x8:
2473 case Iop_NarrowUn32to16x4:
2474 case Iop_NarrowUn64to32x2:
2475 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2476 return at1;
2477 default:
2478 break; /* Do Plan B */
2479 }
2480 /* Plan B: for ops that involve a saturation operation on the args,
2481 we must PCast before the vanilla narrow. */
2482 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002483 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2484 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2485 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2486 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2487 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2488 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2489 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2490 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2491 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2492 default: VG_(tool_panic)("vectorNarrowUnV128");
sewardj57f92b02010-08-22 11:54:14 +00002493 }
sewardjb5a29232011-10-22 09:29:41 +00002494 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardj57f92b02010-08-22 11:54:14 +00002495 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
sewardjb5a29232011-10-22 09:29:41 +00002496 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
sewardj57f92b02010-08-22 11:54:14 +00002497 return at2;
2498}
2499
2500static
sewardj7ee7d852011-06-16 11:37:21 +00002501IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2502 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002503{
2504 IRAtom *at1, *at2;
2505 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2506 switch (longen_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002507 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2508 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2509 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2510 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2511 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2512 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2513 default: VG_(tool_panic)("vectorWidenI64");
sewardj57f92b02010-08-22 11:54:14 +00002514 }
2515 tl_assert(isShadowAtom(mce,vatom1));
2516 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2517 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2518 return at2;
2519}
2520
sewardja1d93302004-12-12 16:45:06 +00002521
2522/* --- --- Vector integer arithmetic --- --- */
2523
2524/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00002525
sewardja2f30952013-03-27 11:40:02 +00002526/* --- V256-bit versions --- */
2527
2528static
2529IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2530{
2531 IRAtom* at;
2532 at = mkUifUV256(mce, vatom1, vatom2);
2533 at = mkPCast8x32(mce, at);
2534 return at;
2535}
2536
2537static
2538IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2539{
2540 IRAtom* at;
2541 at = mkUifUV256(mce, vatom1, vatom2);
2542 at = mkPCast16x16(mce, at);
2543 return at;
2544}
2545
2546static
2547IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2548{
2549 IRAtom* at;
2550 at = mkUifUV256(mce, vatom1, vatom2);
2551 at = mkPCast32x8(mce, at);
2552 return at;
2553}
2554
2555static
2556IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2557{
2558 IRAtom* at;
2559 at = mkUifUV256(mce, vatom1, vatom2);
2560 at = mkPCast64x4(mce, at);
2561 return at;
2562}
2563
sewardj20d38f22005-02-07 23:50:18 +00002564/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00002565
sewardja1d93302004-12-12 16:45:06 +00002566static
2567IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2568{
2569 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002570 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002571 at = mkPCast8x16(mce, at);
2572 return at;
2573}
2574
2575static
2576IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2577{
2578 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002579 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002580 at = mkPCast16x8(mce, at);
2581 return at;
2582}
2583
2584static
2585IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2586{
2587 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002588 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002589 at = mkPCast32x4(mce, at);
2590 return at;
2591}
2592
2593static
2594IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2595{
2596 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002597 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002598 at = mkPCast64x2(mce, at);
2599 return at;
2600}
sewardj3245c912004-12-10 14:58:26 +00002601
sewardjacd2e912005-01-13 19:17:06 +00002602/* --- 64-bit versions --- */
2603
2604static
2605IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2606{
2607 IRAtom* at;
2608 at = mkUifU64(mce, vatom1, vatom2);
2609 at = mkPCast8x8(mce, at);
2610 return at;
2611}
2612
2613static
2614IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2615{
2616 IRAtom* at;
2617 at = mkUifU64(mce, vatom1, vatom2);
2618 at = mkPCast16x4(mce, at);
2619 return at;
2620}
2621
2622static
2623IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2624{
2625 IRAtom* at;
2626 at = mkUifU64(mce, vatom1, vatom2);
2627 at = mkPCast32x2(mce, at);
2628 return at;
2629}
2630
sewardj57f92b02010-08-22 11:54:14 +00002631static
2632IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2633{
2634 IRAtom* at;
2635 at = mkUifU64(mce, vatom1, vatom2);
2636 at = mkPCastTo(mce, Ity_I64, at);
2637 return at;
2638}
2639
sewardjc678b852010-09-22 00:58:51 +00002640/* --- 32-bit versions --- */
2641
2642static
2643IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2644{
2645 IRAtom* at;
2646 at = mkUifU32(mce, vatom1, vatom2);
2647 at = mkPCast8x4(mce, at);
2648 return at;
2649}
2650
2651static
2652IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2653{
2654 IRAtom* at;
2655 at = mkUifU32(mce, vatom1, vatom2);
2656 at = mkPCast16x2(mce, at);
2657 return at;
2658}
2659
sewardj3245c912004-12-10 14:58:26 +00002660
2661/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002662/*--- Generate shadow values from all kinds of IRExprs. ---*/
2663/*------------------------------------------------------------*/
2664
2665static
sewardje91cea72006-02-08 19:32:02 +00002666IRAtom* expr2vbits_Qop ( MCEnv* mce,
2667 IROp op,
2668 IRAtom* atom1, IRAtom* atom2,
2669 IRAtom* atom3, IRAtom* atom4 )
2670{
2671 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2672 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2673 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2674 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2675
2676 tl_assert(isOriginalAtom(mce,atom1));
2677 tl_assert(isOriginalAtom(mce,atom2));
2678 tl_assert(isOriginalAtom(mce,atom3));
2679 tl_assert(isOriginalAtom(mce,atom4));
2680 tl_assert(isShadowAtom(mce,vatom1));
2681 tl_assert(isShadowAtom(mce,vatom2));
2682 tl_assert(isShadowAtom(mce,vatom3));
2683 tl_assert(isShadowAtom(mce,vatom4));
2684 tl_assert(sameKindedAtoms(atom1,vatom1));
2685 tl_assert(sameKindedAtoms(atom2,vatom2));
2686 tl_assert(sameKindedAtoms(atom3,vatom3));
2687 tl_assert(sameKindedAtoms(atom4,vatom4));
2688 switch (op) {
2689 case Iop_MAddF64:
2690 case Iop_MAddF64r32:
2691 case Iop_MSubF64:
2692 case Iop_MSubF64r32:
2693 /* I32(rm) x F64 x F64 x F64 -> F64 */
2694 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
sewardjb5b87402011-03-07 16:05:35 +00002695
2696 case Iop_MAddF32:
2697 case Iop_MSubF32:
2698 /* I32(rm) x F32 x F32 x F32 -> F32 */
2699 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2700
sewardj350e8f72012-06-25 07:52:15 +00002701 /* V256-bit data-steering */
2702 case Iop_64x4toV256:
2703 return assignNew('V', mce, Ity_V256,
2704 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2705
sewardje91cea72006-02-08 19:32:02 +00002706 default:
2707 ppIROp(op);
2708 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2709 }
2710}
2711
2712
2713static
sewardjed69fdb2006-02-03 16:12:27 +00002714IRAtom* expr2vbits_Triop ( MCEnv* mce,
2715 IROp op,
2716 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2717{
sewardjed69fdb2006-02-03 16:12:27 +00002718 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2719 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2720 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2721
2722 tl_assert(isOriginalAtom(mce,atom1));
2723 tl_assert(isOriginalAtom(mce,atom2));
2724 tl_assert(isOriginalAtom(mce,atom3));
2725 tl_assert(isShadowAtom(mce,vatom1));
2726 tl_assert(isShadowAtom(mce,vatom2));
2727 tl_assert(isShadowAtom(mce,vatom3));
2728 tl_assert(sameKindedAtoms(atom1,vatom1));
2729 tl_assert(sameKindedAtoms(atom2,vatom2));
2730 tl_assert(sameKindedAtoms(atom3,vatom3));
2731 switch (op) {
sewardjb5b87402011-03-07 16:05:35 +00002732 case Iop_AddF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002733 case Iop_AddD128:
sewardjb5b87402011-03-07 16:05:35 +00002734 case Iop_SubF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002735 case Iop_SubD128:
sewardjb5b87402011-03-07 16:05:35 +00002736 case Iop_MulF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002737 case Iop_MulD128:
sewardjb5b87402011-03-07 16:05:35 +00002738 case Iop_DivF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002739 case Iop_DivD128:
sewardj18c72fa2012-04-23 11:22:05 +00002740 case Iop_QuantizeD128:
2741 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
sewardjb5b87402011-03-07 16:05:35 +00002742 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardjed69fdb2006-02-03 16:12:27 +00002743 case Iop_AddF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002744 case Iop_AddD64:
sewardjed69fdb2006-02-03 16:12:27 +00002745 case Iop_AddF64r32:
2746 case Iop_SubF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002747 case Iop_SubD64:
sewardjed69fdb2006-02-03 16:12:27 +00002748 case Iop_SubF64r32:
2749 case Iop_MulF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002750 case Iop_MulD64:
sewardjed69fdb2006-02-03 16:12:27 +00002751 case Iop_MulF64r32:
2752 case Iop_DivF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002753 case Iop_DivD64:
sewardjed69fdb2006-02-03 16:12:27 +00002754 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002755 case Iop_ScaleF64:
2756 case Iop_Yl2xF64:
2757 case Iop_Yl2xp1F64:
2758 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002759 case Iop_PRemF64:
2760 case Iop_PRem1F64:
sewardj18c72fa2012-04-23 11:22:05 +00002761 case Iop_QuantizeD64:
2762 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
sewardjed69fdb2006-02-03 16:12:27 +00002763 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002764 case Iop_PRemC3210F64:
2765 case Iop_PRem1C3210F64:
2766 /* I32(rm) x F64 x F64 -> I32 */
2767 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj59570ff2010-01-01 11:59:33 +00002768 case Iop_AddF32:
2769 case Iop_SubF32:
2770 case Iop_MulF32:
2771 case Iop_DivF32:
2772 /* I32(rm) x F32 x F32 -> I32 */
2773 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj18c72fa2012-04-23 11:22:05 +00002774 case Iop_SignificanceRoundD64:
florian733b4db2013-06-06 19:13:29 +00002775 /* IRRoundingMode(I32) x I8 x D64 -> D64 */
sewardj18c72fa2012-04-23 11:22:05 +00002776 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2777 case Iop_SignificanceRoundD128:
florian733b4db2013-06-06 19:13:29 +00002778 /* IRRoundingMode(I32) x I8 x D128 -> D128 */
sewardj18c72fa2012-04-23 11:22:05 +00002779 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardj57f92b02010-08-22 11:54:14 +00002780 case Iop_ExtractV128:
sewardjb9e6d242013-05-11 13:42:08 +00002781 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002782 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2783 case Iop_Extract64:
sewardjb9e6d242013-05-11 13:42:08 +00002784 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002785 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2786 case Iop_SetElem8x8:
2787 case Iop_SetElem16x4:
2788 case Iop_SetElem32x2:
sewardjb9e6d242013-05-11 13:42:08 +00002789 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002790 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
carll24e40de2013-10-15 18:13:21 +00002791 /* BCDIops */
2792 case Iop_BCDAdd:
2793 case Iop_BCDSub:
2794 complainIfUndefined(mce, atom3, NULL);
2795 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2796
sewardj1eb272f2014-01-26 18:36:52 +00002797 /* Vector FP with rounding mode as the first arg */
2798 case Iop_Add64Fx2:
2799 case Iop_Sub64Fx2:
2800 case Iop_Mul64Fx2:
2801 case Iop_Div64Fx2:
2802 return binary64Fx2_w_rm(mce, vatom1, vatom2, vatom3);
2803
2804 case Iop_Add32Fx4:
2805 case Iop_Sub32Fx4:
2806 case Iop_Mul32Fx4:
2807 case Iop_Div32Fx4:
2808 return binary32Fx4_w_rm(mce, vatom1, vatom2, vatom3);
2809
2810 case Iop_Add64Fx4:
2811 case Iop_Sub64Fx4:
2812 case Iop_Mul64Fx4:
2813 case Iop_Div64Fx4:
2814 return binary64Fx4_w_rm(mce, vatom1, vatom2, vatom3);
2815
2816 case Iop_Add32Fx8:
2817 case Iop_Sub32Fx8:
2818 case Iop_Mul32Fx8:
2819 case Iop_Div32Fx8:
2820 return binary32Fx8_w_rm(mce, vatom1, vatom2, vatom3);
2821
sewardjed69fdb2006-02-03 16:12:27 +00002822 default:
2823 ppIROp(op);
2824 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2825 }
2826}
2827
2828
2829static
sewardj95448072004-11-22 20:19:51 +00002830IRAtom* expr2vbits_Binop ( MCEnv* mce,
2831 IROp op,
2832 IRAtom* atom1, IRAtom* atom2 )
2833{
2834 IRType and_or_ty;
2835 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2836 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2837 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2838
2839 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2840 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2841
2842 tl_assert(isOriginalAtom(mce,atom1));
2843 tl_assert(isOriginalAtom(mce,atom2));
2844 tl_assert(isShadowAtom(mce,vatom1));
2845 tl_assert(isShadowAtom(mce,vatom2));
2846 tl_assert(sameKindedAtoms(atom1,vatom1));
2847 tl_assert(sameKindedAtoms(atom2,vatom2));
2848 switch (op) {
2849
sewardjc678b852010-09-22 00:58:51 +00002850 /* 32-bit SIMD */
2851
2852 case Iop_Add16x2:
2853 case Iop_HAdd16Ux2:
2854 case Iop_HAdd16Sx2:
2855 case Iop_Sub16x2:
2856 case Iop_HSub16Ux2:
2857 case Iop_HSub16Sx2:
2858 case Iop_QAdd16Sx2:
2859 case Iop_QSub16Sx2:
sewardj9fb31092012-09-17 15:28:46 +00002860 case Iop_QSub16Ux2:
sewardj7a370652013-07-04 20:37:33 +00002861 case Iop_QAdd16Ux2:
sewardjc678b852010-09-22 00:58:51 +00002862 return binary16Ix2(mce, vatom1, vatom2);
2863
2864 case Iop_Add8x4:
2865 case Iop_HAdd8Ux4:
2866 case Iop_HAdd8Sx4:
2867 case Iop_Sub8x4:
2868 case Iop_HSub8Ux4:
2869 case Iop_HSub8Sx4:
2870 case Iop_QSub8Ux4:
2871 case Iop_QAdd8Ux4:
2872 case Iop_QSub8Sx4:
2873 case Iop_QAdd8Sx4:
2874 return binary8Ix4(mce, vatom1, vatom2);
2875
sewardjacd2e912005-01-13 19:17:06 +00002876 /* 64-bit SIMD */
2877
sewardj57f92b02010-08-22 11:54:14 +00002878 case Iop_ShrN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002879 case Iop_ShrN16x4:
2880 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002881 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002882 case Iop_SarN16x4:
2883 case Iop_SarN32x2:
2884 case Iop_ShlN16x4:
2885 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002886 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002887 /* Same scheme as with all other shifts. */
sewardjb9e6d242013-05-11 13:42:08 +00002888 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00002889 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002890
sewardj7ee7d852011-06-16 11:37:21 +00002891 case Iop_QNarrowBin32Sto16Sx4:
2892 case Iop_QNarrowBin16Sto8Sx8:
2893 case Iop_QNarrowBin16Sto8Ux8:
2894 return vectorNarrowBin64(mce, op, vatom1, vatom2);
sewardjacd2e912005-01-13 19:17:06 +00002895
2896 case Iop_Min8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002897 case Iop_Min8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002898 case Iop_Max8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002899 case Iop_Max8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002900 case Iop_Avg8Ux8:
2901 case Iop_QSub8Sx8:
2902 case Iop_QSub8Ux8:
2903 case Iop_Sub8x8:
2904 case Iop_CmpGT8Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002905 case Iop_CmpGT8Ux8:
sewardjacd2e912005-01-13 19:17:06 +00002906 case Iop_CmpEQ8x8:
2907 case Iop_QAdd8Sx8:
2908 case Iop_QAdd8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002909 case Iop_QSal8x8:
2910 case Iop_QShl8x8:
sewardjacd2e912005-01-13 19:17:06 +00002911 case Iop_Add8x8:
sewardj57f92b02010-08-22 11:54:14 +00002912 case Iop_Mul8x8:
2913 case Iop_PolynomialMul8x8:
sewardjacd2e912005-01-13 19:17:06 +00002914 return binary8Ix8(mce, vatom1, vatom2);
2915
2916 case Iop_Min16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002917 case Iop_Min16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002918 case Iop_Max16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002919 case Iop_Max16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002920 case Iop_Avg16Ux4:
2921 case Iop_QSub16Ux4:
2922 case Iop_QSub16Sx4:
2923 case Iop_Sub16x4:
2924 case Iop_Mul16x4:
2925 case Iop_MulHi16Sx4:
2926 case Iop_MulHi16Ux4:
2927 case Iop_CmpGT16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002928 case Iop_CmpGT16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002929 case Iop_CmpEQ16x4:
2930 case Iop_QAdd16Sx4:
2931 case Iop_QAdd16Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002932 case Iop_QSal16x4:
2933 case Iop_QShl16x4:
sewardjacd2e912005-01-13 19:17:06 +00002934 case Iop_Add16x4:
sewardj57f92b02010-08-22 11:54:14 +00002935 case Iop_QDMulHi16Sx4:
2936 case Iop_QRDMulHi16Sx4:
sewardjacd2e912005-01-13 19:17:06 +00002937 return binary16Ix4(mce, vatom1, vatom2);
2938
2939 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00002940 case Iop_Mul32x2:
sewardj57f92b02010-08-22 11:54:14 +00002941 case Iop_Max32Sx2:
2942 case Iop_Max32Ux2:
2943 case Iop_Min32Sx2:
2944 case Iop_Min32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002945 case Iop_CmpGT32Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002946 case Iop_CmpGT32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002947 case Iop_CmpEQ32x2:
2948 case Iop_Add32x2:
sewardj57f92b02010-08-22 11:54:14 +00002949 case Iop_QAdd32Ux2:
2950 case Iop_QAdd32Sx2:
2951 case Iop_QSub32Ux2:
2952 case Iop_QSub32Sx2:
2953 case Iop_QSal32x2:
2954 case Iop_QShl32x2:
2955 case Iop_QDMulHi32Sx2:
2956 case Iop_QRDMulHi32Sx2:
sewardjacd2e912005-01-13 19:17:06 +00002957 return binary32Ix2(mce, vatom1, vatom2);
2958
sewardj57f92b02010-08-22 11:54:14 +00002959 case Iop_QSub64Ux1:
2960 case Iop_QSub64Sx1:
2961 case Iop_QAdd64Ux1:
2962 case Iop_QAdd64Sx1:
2963 case Iop_QSal64x1:
2964 case Iop_QShl64x1:
2965 case Iop_Sal64x1:
2966 return binary64Ix1(mce, vatom1, vatom2);
2967
2968 case Iop_QShlN8Sx8:
2969 case Iop_QShlN8x8:
2970 case Iop_QSalN8x8:
sewardjb9e6d242013-05-11 13:42:08 +00002971 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002972 return mkPCast8x8(mce, vatom1);
2973
2974 case Iop_QShlN16Sx4:
2975 case Iop_QShlN16x4:
2976 case Iop_QSalN16x4:
sewardjb9e6d242013-05-11 13:42:08 +00002977 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002978 return mkPCast16x4(mce, vatom1);
2979
2980 case Iop_QShlN32Sx2:
2981 case Iop_QShlN32x2:
2982 case Iop_QSalN32x2:
sewardjb9e6d242013-05-11 13:42:08 +00002983 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002984 return mkPCast32x2(mce, vatom1);
2985
2986 case Iop_QShlN64Sx1:
2987 case Iop_QShlN64x1:
2988 case Iop_QSalN64x1:
sewardjb9e6d242013-05-11 13:42:08 +00002989 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002990 return mkPCast32x2(mce, vatom1);
2991
2992 case Iop_PwMax32Sx2:
2993 case Iop_PwMax32Ux2:
2994 case Iop_PwMin32Sx2:
2995 case Iop_PwMin32Ux2:
2996 case Iop_PwMax32Fx2:
2997 case Iop_PwMin32Fx2:
sewardj350e8f72012-06-25 07:52:15 +00002998 return assignNew('V', mce, Ity_I64,
2999 binop(Iop_PwMax32Ux2,
3000 mkPCast32x2(mce, vatom1),
3001 mkPCast32x2(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00003002
3003 case Iop_PwMax16Sx4:
3004 case Iop_PwMax16Ux4:
3005 case Iop_PwMin16Sx4:
3006 case Iop_PwMin16Ux4:
sewardj350e8f72012-06-25 07:52:15 +00003007 return assignNew('V', mce, Ity_I64,
3008 binop(Iop_PwMax16Ux4,
3009 mkPCast16x4(mce, vatom1),
3010 mkPCast16x4(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00003011
3012 case Iop_PwMax8Sx8:
3013 case Iop_PwMax8Ux8:
3014 case Iop_PwMin8Sx8:
3015 case Iop_PwMin8Ux8:
sewardj350e8f72012-06-25 07:52:15 +00003016 return assignNew('V', mce, Ity_I64,
3017 binop(Iop_PwMax8Ux8,
3018 mkPCast8x8(mce, vatom1),
3019 mkPCast8x8(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00003020
3021 case Iop_PwAdd32x2:
3022 case Iop_PwAdd32Fx2:
3023 return mkPCast32x2(mce,
sewardj350e8f72012-06-25 07:52:15 +00003024 assignNew('V', mce, Ity_I64,
3025 binop(Iop_PwAdd32x2,
3026 mkPCast32x2(mce, vatom1),
3027 mkPCast32x2(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00003028
3029 case Iop_PwAdd16x4:
3030 return mkPCast16x4(mce,
sewardj350e8f72012-06-25 07:52:15 +00003031 assignNew('V', mce, Ity_I64,
3032 binop(op, mkPCast16x4(mce, vatom1),
3033 mkPCast16x4(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00003034
3035 case Iop_PwAdd8x8:
3036 return mkPCast8x8(mce,
sewardj350e8f72012-06-25 07:52:15 +00003037 assignNew('V', mce, Ity_I64,
3038 binop(op, mkPCast8x8(mce, vatom1),
3039 mkPCast8x8(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00003040
3041 case Iop_Shl8x8:
3042 case Iop_Shr8x8:
3043 case Iop_Sar8x8:
3044 case Iop_Sal8x8:
3045 return mkUifU64(mce,
3046 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3047 mkPCast8x8(mce,vatom2)
3048 );
3049
3050 case Iop_Shl16x4:
3051 case Iop_Shr16x4:
3052 case Iop_Sar16x4:
3053 case Iop_Sal16x4:
3054 return mkUifU64(mce,
3055 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3056 mkPCast16x4(mce,vatom2)
3057 );
3058
3059 case Iop_Shl32x2:
3060 case Iop_Shr32x2:
3061 case Iop_Sar32x2:
3062 case Iop_Sal32x2:
3063 return mkUifU64(mce,
3064 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3065 mkPCast32x2(mce,vatom2)
3066 );
3067
sewardjacd2e912005-01-13 19:17:06 +00003068 /* 64-bit data-steering */
3069 case Iop_InterleaveLO32x2:
3070 case Iop_InterleaveLO16x4:
3071 case Iop_InterleaveLO8x8:
3072 case Iop_InterleaveHI32x2:
3073 case Iop_InterleaveHI16x4:
3074 case Iop_InterleaveHI8x8:
sewardj57f92b02010-08-22 11:54:14 +00003075 case Iop_CatOddLanes8x8:
3076 case Iop_CatEvenLanes8x8:
sewardj114a9172008-02-09 01:49:32 +00003077 case Iop_CatOddLanes16x4:
3078 case Iop_CatEvenLanes16x4:
sewardj57f92b02010-08-22 11:54:14 +00003079 case Iop_InterleaveOddLanes8x8:
3080 case Iop_InterleaveEvenLanes8x8:
3081 case Iop_InterleaveOddLanes16x4:
3082 case Iop_InterleaveEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003083 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00003084
sewardj57f92b02010-08-22 11:54:14 +00003085 case Iop_GetElem8x8:
sewardjb9e6d242013-05-11 13:42:08 +00003086 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003087 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3088 case Iop_GetElem16x4:
sewardjb9e6d242013-05-11 13:42:08 +00003089 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003090 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3091 case Iop_GetElem32x2:
sewardjb9e6d242013-05-11 13:42:08 +00003092 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003093 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3094
sewardj114a9172008-02-09 01:49:32 +00003095 /* Perm8x8: rearrange values in left arg using steering values
3096 from right arg. So rearrange the vbits in the same way but
3097 pessimise wrt steering values. */
3098 case Iop_Perm8x8:
3099 return mkUifU64(
3100 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003101 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00003102 mkPCast8x8(mce, vatom2)
3103 );
3104
sewardj20d38f22005-02-07 23:50:18 +00003105 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00003106
sewardj57f92b02010-08-22 11:54:14 +00003107 case Iop_ShrN8x16:
sewardja1d93302004-12-12 16:45:06 +00003108 case Iop_ShrN16x8:
3109 case Iop_ShrN32x4:
3110 case Iop_ShrN64x2:
sewardj57f92b02010-08-22 11:54:14 +00003111 case Iop_SarN8x16:
sewardja1d93302004-12-12 16:45:06 +00003112 case Iop_SarN16x8:
3113 case Iop_SarN32x4:
sewardj57f92b02010-08-22 11:54:14 +00003114 case Iop_SarN64x2:
3115 case Iop_ShlN8x16:
sewardja1d93302004-12-12 16:45:06 +00003116 case Iop_ShlN16x8:
3117 case Iop_ShlN32x4:
3118 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00003119 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3120 this is wrong now, scalar shifts are done properly lazily.
3121 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003122 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00003123 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00003124
sewardjcbf8be72005-11-10 18:34:41 +00003125 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00003126 case Iop_Shl8x16:
3127 case Iop_Shr8x16:
3128 case Iop_Sar8x16:
sewardj57f92b02010-08-22 11:54:14 +00003129 case Iop_Sal8x16:
sewardjcbf8be72005-11-10 18:34:41 +00003130 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00003131 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003132 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003133 mkPCast8x16(mce,vatom2)
3134 );
3135
3136 case Iop_Shl16x8:
3137 case Iop_Shr16x8:
3138 case Iop_Sar16x8:
sewardj57f92b02010-08-22 11:54:14 +00003139 case Iop_Sal16x8:
sewardjcbf8be72005-11-10 18:34:41 +00003140 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00003141 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003142 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003143 mkPCast16x8(mce,vatom2)
3144 );
3145
3146 case Iop_Shl32x4:
3147 case Iop_Shr32x4:
3148 case Iop_Sar32x4:
sewardj57f92b02010-08-22 11:54:14 +00003149 case Iop_Sal32x4:
sewardjcbf8be72005-11-10 18:34:41 +00003150 case Iop_Rol32x4:
carll62770672013-10-01 15:50:09 +00003151 case Iop_Rol64x2:
sewardj43d60752005-11-10 18:13:01 +00003152 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003153 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003154 mkPCast32x4(mce,vatom2)
3155 );
3156
sewardj57f92b02010-08-22 11:54:14 +00003157 case Iop_Shl64x2:
3158 case Iop_Shr64x2:
3159 case Iop_Sar64x2:
3160 case Iop_Sal64x2:
3161 return mkUifUV128(mce,
3162 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3163 mkPCast64x2(mce,vatom2)
3164 );
3165
3166 case Iop_F32ToFixed32Ux4_RZ:
3167 case Iop_F32ToFixed32Sx4_RZ:
3168 case Iop_Fixed32UToF32x4_RN:
3169 case Iop_Fixed32SToF32x4_RN:
sewardjb9e6d242013-05-11 13:42:08 +00003170 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003171 return mkPCast32x4(mce, vatom1);
3172
3173 case Iop_F32ToFixed32Ux2_RZ:
3174 case Iop_F32ToFixed32Sx2_RZ:
3175 case Iop_Fixed32UToF32x2_RN:
3176 case Iop_Fixed32SToF32x2_RN:
sewardjb9e6d242013-05-11 13:42:08 +00003177 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003178 return mkPCast32x2(mce, vatom1);
3179
sewardja1d93302004-12-12 16:45:06 +00003180 case Iop_QSub8Ux16:
3181 case Iop_QSub8Sx16:
3182 case Iop_Sub8x16:
3183 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003184 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003185 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003186 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003187 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00003188 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00003189 case Iop_CmpEQ8x16:
3190 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003191 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003192 case Iop_QAdd8Ux16:
3193 case Iop_QAdd8Sx16:
sewardj57f92b02010-08-22 11:54:14 +00003194 case Iop_QSal8x16:
3195 case Iop_QShl8x16:
sewardja1d93302004-12-12 16:45:06 +00003196 case Iop_Add8x16:
sewardj57f92b02010-08-22 11:54:14 +00003197 case Iop_Mul8x16:
3198 case Iop_PolynomialMul8x16:
carll24e40de2013-10-15 18:13:21 +00003199 case Iop_PolynomialMulAdd8x16:
sewardja1d93302004-12-12 16:45:06 +00003200 return binary8Ix16(mce, vatom1, vatom2);
3201
3202 case Iop_QSub16Ux8:
3203 case Iop_QSub16Sx8:
3204 case Iop_Sub16x8:
3205 case Iop_Mul16x8:
3206 case Iop_MulHi16Sx8:
3207 case Iop_MulHi16Ux8:
3208 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003209 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003210 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003211 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003212 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003213 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003214 case Iop_CmpEQ16x8:
3215 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00003216 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00003217 case Iop_QAdd16Ux8:
3218 case Iop_QAdd16Sx8:
sewardj57f92b02010-08-22 11:54:14 +00003219 case Iop_QSal16x8:
3220 case Iop_QShl16x8:
sewardja1d93302004-12-12 16:45:06 +00003221 case Iop_Add16x8:
sewardj57f92b02010-08-22 11:54:14 +00003222 case Iop_QDMulHi16Sx8:
3223 case Iop_QRDMulHi16Sx8:
carll24e40de2013-10-15 18:13:21 +00003224 case Iop_PolynomialMulAdd16x8:
sewardja1d93302004-12-12 16:45:06 +00003225 return binary16Ix8(mce, vatom1, vatom2);
3226
3227 case Iop_Sub32x4:
3228 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00003229 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00003230 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00003231 case Iop_QAdd32Sx4:
3232 case Iop_QAdd32Ux4:
3233 case Iop_QSub32Sx4:
3234 case Iop_QSub32Ux4:
sewardj57f92b02010-08-22 11:54:14 +00003235 case Iop_QSal32x4:
3236 case Iop_QShl32x4:
sewardj43d60752005-11-10 18:13:01 +00003237 case Iop_Avg32Ux4:
3238 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00003239 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00003240 case Iop_Max32Ux4:
3241 case Iop_Max32Sx4:
3242 case Iop_Min32Ux4:
3243 case Iop_Min32Sx4:
sewardjb823b852010-06-18 08:18:38 +00003244 case Iop_Mul32x4:
sewardj57f92b02010-08-22 11:54:14 +00003245 case Iop_QDMulHi32Sx4:
3246 case Iop_QRDMulHi32Sx4:
carll24e40de2013-10-15 18:13:21 +00003247 case Iop_PolynomialMulAdd32x4:
sewardja1d93302004-12-12 16:45:06 +00003248 return binary32Ix4(mce, vatom1, vatom2);
3249
3250 case Iop_Sub64x2:
3251 case Iop_Add64x2:
carll62770672013-10-01 15:50:09 +00003252 case Iop_Max64Sx2:
3253 case Iop_Max64Ux2:
3254 case Iop_Min64Sx2:
3255 case Iop_Min64Ux2:
sewardj9a2afe92011-10-19 15:24:55 +00003256 case Iop_CmpEQ64x2:
sewardjb823b852010-06-18 08:18:38 +00003257 case Iop_CmpGT64Sx2:
carll62770672013-10-01 15:50:09 +00003258 case Iop_CmpGT64Ux2:
sewardj57f92b02010-08-22 11:54:14 +00003259 case Iop_QSal64x2:
3260 case Iop_QShl64x2:
3261 case Iop_QAdd64Ux2:
3262 case Iop_QAdd64Sx2:
3263 case Iop_QSub64Ux2:
3264 case Iop_QSub64Sx2:
carll24e40de2013-10-15 18:13:21 +00003265 case Iop_PolynomialMulAdd64x2:
3266 case Iop_CipherV128:
3267 case Iop_CipherLV128:
3268 case Iop_NCipherV128:
3269 case Iop_NCipherLV128:
3270 return binary64Ix2(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00003271
carll62770672013-10-01 15:50:09 +00003272 case Iop_QNarrowBin64Sto32Sx4:
3273 case Iop_QNarrowBin64Uto32Ux4:
sewardj7ee7d852011-06-16 11:37:21 +00003274 case Iop_QNarrowBin32Sto16Sx8:
3275 case Iop_QNarrowBin32Uto16Ux8:
3276 case Iop_QNarrowBin32Sto16Ux8:
3277 case Iop_QNarrowBin16Sto8Sx16:
3278 case Iop_QNarrowBin16Uto8Ux16:
3279 case Iop_QNarrowBin16Sto8Ux16:
3280 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00003281
sewardj0b070592004-12-10 21:44:22 +00003282 case Iop_Min64Fx2:
3283 case Iop_Max64Fx2:
sewardj0b070592004-12-10 21:44:22 +00003284 case Iop_CmpLT64Fx2:
3285 case Iop_CmpLE64Fx2:
3286 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00003287 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00003288 return binary64Fx2(mce, vatom1, vatom2);
3289
3290 case Iop_Sub64F0x2:
3291 case Iop_Mul64F0x2:
3292 case Iop_Min64F0x2:
3293 case Iop_Max64F0x2:
3294 case Iop_Div64F0x2:
3295 case Iop_CmpLT64F0x2:
3296 case Iop_CmpLE64F0x2:
3297 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00003298 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00003299 case Iop_Add64F0x2:
3300 return binary64F0x2(mce, vatom1, vatom2);
3301
sewardj170ee212004-12-10 18:57:51 +00003302 case Iop_Min32Fx4:
3303 case Iop_Max32Fx4:
sewardj170ee212004-12-10 18:57:51 +00003304 case Iop_CmpLT32Fx4:
3305 case Iop_CmpLE32Fx4:
3306 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00003307 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00003308 case Iop_CmpGT32Fx4:
3309 case Iop_CmpGE32Fx4:
sewardj57f92b02010-08-22 11:54:14 +00003310 case Iop_Recps32Fx4:
3311 case Iop_Rsqrts32Fx4:
sewardj3245c912004-12-10 14:58:26 +00003312 return binary32Fx4(mce, vatom1, vatom2);
3313
sewardj57f92b02010-08-22 11:54:14 +00003314 case Iop_Sub32Fx2:
3315 case Iop_Mul32Fx2:
3316 case Iop_Min32Fx2:
3317 case Iop_Max32Fx2:
3318 case Iop_CmpEQ32Fx2:
3319 case Iop_CmpGT32Fx2:
3320 case Iop_CmpGE32Fx2:
3321 case Iop_Add32Fx2:
3322 case Iop_Recps32Fx2:
3323 case Iop_Rsqrts32Fx2:
3324 return binary32Fx2(mce, vatom1, vatom2);
3325
sewardj170ee212004-12-10 18:57:51 +00003326 case Iop_Sub32F0x4:
3327 case Iop_Mul32F0x4:
3328 case Iop_Min32F0x4:
3329 case Iop_Max32F0x4:
3330 case Iop_Div32F0x4:
3331 case Iop_CmpLT32F0x4:
3332 case Iop_CmpLE32F0x4:
3333 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00003334 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00003335 case Iop_Add32F0x4:
3336 return binary32F0x4(mce, vatom1, vatom2);
3337
sewardj57f92b02010-08-22 11:54:14 +00003338 case Iop_QShlN8Sx16:
3339 case Iop_QShlN8x16:
3340 case Iop_QSalN8x16:
sewardjb9e6d242013-05-11 13:42:08 +00003341 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003342 return mkPCast8x16(mce, vatom1);
3343
3344 case Iop_QShlN16Sx8:
3345 case Iop_QShlN16x8:
3346 case Iop_QSalN16x8:
sewardjb9e6d242013-05-11 13:42:08 +00003347 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003348 return mkPCast16x8(mce, vatom1);
3349
3350 case Iop_QShlN32Sx4:
3351 case Iop_QShlN32x4:
3352 case Iop_QSalN32x4:
sewardjb9e6d242013-05-11 13:42:08 +00003353 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003354 return mkPCast32x4(mce, vatom1);
3355
3356 case Iop_QShlN64Sx2:
3357 case Iop_QShlN64x2:
3358 case Iop_QSalN64x2:
sewardjb9e6d242013-05-11 13:42:08 +00003359 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003360 return mkPCast32x4(mce, vatom1);
3361
3362 case Iop_Mull32Sx2:
3363 case Iop_Mull32Ux2:
3364 case Iop_QDMulLong32Sx2:
sewardj7ee7d852011-06-16 11:37:21 +00003365 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3366 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003367
3368 case Iop_Mull16Sx4:
3369 case Iop_Mull16Ux4:
3370 case Iop_QDMulLong16Sx4:
sewardj7ee7d852011-06-16 11:37:21 +00003371 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3372 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003373
3374 case Iop_Mull8Sx8:
3375 case Iop_Mull8Ux8:
3376 case Iop_PolynomialMull8x8:
sewardj7ee7d852011-06-16 11:37:21 +00003377 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3378 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003379
3380 case Iop_PwAdd32x4:
3381 return mkPCast32x4(mce,
3382 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3383 mkPCast32x4(mce, vatom2))));
3384
3385 case Iop_PwAdd16x8:
3386 return mkPCast16x8(mce,
3387 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3388 mkPCast16x8(mce, vatom2))));
3389
3390 case Iop_PwAdd8x16:
3391 return mkPCast8x16(mce,
3392 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3393 mkPCast8x16(mce, vatom2))));
3394
sewardj20d38f22005-02-07 23:50:18 +00003395 /* V128-bit data-steering */
3396 case Iop_SetV128lo32:
3397 case Iop_SetV128lo64:
3398 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00003399 case Iop_InterleaveLO64x2:
3400 case Iop_InterleaveLO32x4:
3401 case Iop_InterleaveLO16x8:
3402 case Iop_InterleaveLO8x16:
3403 case Iop_InterleaveHI64x2:
3404 case Iop_InterleaveHI32x4:
3405 case Iop_InterleaveHI16x8:
3406 case Iop_InterleaveHI8x16:
sewardj57f92b02010-08-22 11:54:14 +00003407 case Iop_CatOddLanes8x16:
3408 case Iop_CatOddLanes16x8:
3409 case Iop_CatOddLanes32x4:
3410 case Iop_CatEvenLanes8x16:
3411 case Iop_CatEvenLanes16x8:
3412 case Iop_CatEvenLanes32x4:
3413 case Iop_InterleaveOddLanes8x16:
3414 case Iop_InterleaveOddLanes16x8:
3415 case Iop_InterleaveOddLanes32x4:
3416 case Iop_InterleaveEvenLanes8x16:
3417 case Iop_InterleaveEvenLanes16x8:
3418 case Iop_InterleaveEvenLanes32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003419 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003420
3421 case Iop_GetElem8x16:
sewardjb9e6d242013-05-11 13:42:08 +00003422 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003423 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3424 case Iop_GetElem16x8:
sewardjb9e6d242013-05-11 13:42:08 +00003425 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003426 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3427 case Iop_GetElem32x4:
sewardjb9e6d242013-05-11 13:42:08 +00003428 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003429 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3430 case Iop_GetElem64x2:
sewardjb9e6d242013-05-11 13:42:08 +00003431 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003432 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3433
sewardj620eb5b2005-10-22 12:50:43 +00003434 /* Perm8x16: rearrange values in left arg using steering values
3435 from right arg. So rearrange the vbits in the same way but
sewardj350e8f72012-06-25 07:52:15 +00003436 pessimise wrt steering values. Perm32x4 ditto. */
sewardj620eb5b2005-10-22 12:50:43 +00003437 case Iop_Perm8x16:
3438 return mkUifUV128(
3439 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003440 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00003441 mkPCast8x16(mce, vatom2)
3442 );
sewardj350e8f72012-06-25 07:52:15 +00003443 case Iop_Perm32x4:
3444 return mkUifUV128(
3445 mce,
3446 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3447 mkPCast32x4(mce, vatom2)
3448 );
sewardj170ee212004-12-10 18:57:51 +00003449
sewardj43d60752005-11-10 18:13:01 +00003450 /* These two take the lower half of each 16-bit lane, sign/zero
3451 extend it to 32, and multiply together, producing a 32x4
3452 result (and implicitly ignoring half the operand bits). So
3453 treat it as a bunch of independent 16x8 operations, but then
3454 do 32-bit shifts left-right to copy the lower half results
3455 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3456 into the upper half of each result lane. */
3457 case Iop_MullEven16Ux8:
3458 case Iop_MullEven16Sx8: {
3459 IRAtom* at;
3460 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003461 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3462 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00003463 return at;
3464 }
3465
3466 /* Same deal as Iop_MullEven16{S,U}x8 */
3467 case Iop_MullEven8Ux16:
3468 case Iop_MullEven8Sx16: {
3469 IRAtom* at;
3470 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003471 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3472 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00003473 return at;
3474 }
3475
carll62770672013-10-01 15:50:09 +00003476 /* Same deal as Iop_MullEven16{S,U}x8 */
3477 case Iop_MullEven32Ux4:
3478 case Iop_MullEven32Sx4: {
3479 IRAtom* at;
3480 at = binary32Ix4(mce,vatom1,vatom2);
3481 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN64x2, at, mkU8(32)));
3482 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN64x2, at, mkU8(32)));
3483 return at;
3484 }
3485
sewardj43d60752005-11-10 18:13:01 +00003486 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3487 32x4 -> 16x8 laneage, discarding the upper half of each lane.
3488 Simply apply same op to the V bits, since this really no more
3489 than a data steering operation. */
sewardj7ee7d852011-06-16 11:37:21 +00003490 case Iop_NarrowBin32to16x8:
3491 case Iop_NarrowBin16to8x16:
carlldfbf2942013-08-12 18:04:22 +00003492 case Iop_NarrowBin64to32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003493 return assignNew('V', mce, Ity_V128,
3494 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00003495
3496 case Iop_ShrV128:
3497 case Iop_ShlV128:
3498 /* Same scheme as with all other shifts. Note: 10 Nov 05:
3499 this is wrong now, scalar shifts are done properly lazily.
3500 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003501 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00003502 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00003503
carll24e40de2013-10-15 18:13:21 +00003504 /* SHA Iops */
3505 case Iop_SHA256:
3506 case Iop_SHA512:
3507 complainIfUndefined(mce, atom2, NULL);
3508 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3509
sewardj69a13322005-04-23 01:14:51 +00003510 /* I128-bit data-steering */
3511 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00003512 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00003513
sewardj350e8f72012-06-25 07:52:15 +00003514 /* V256-bit SIMD */
3515
sewardj350e8f72012-06-25 07:52:15 +00003516 case Iop_Max64Fx4:
3517 case Iop_Min64Fx4:
3518 return binary64Fx4(mce, vatom1, vatom2);
3519
sewardj350e8f72012-06-25 07:52:15 +00003520 case Iop_Max32Fx8:
3521 case Iop_Min32Fx8:
3522 return binary32Fx8(mce, vatom1, vatom2);
3523
3524 /* V256-bit data-steering */
3525 case Iop_V128HLtoV256:
3526 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3527
sewardj3245c912004-12-10 14:58:26 +00003528 /* Scalar floating point */
3529
sewardjb5b87402011-03-07 16:05:35 +00003530 case Iop_F32toI64S:
florian1b9609a2012-09-01 00:15:45 +00003531 case Iop_F32toI64U:
sewardjb5b87402011-03-07 16:05:35 +00003532 /* I32(rm) x F32 -> I64 */
3533 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3534
3535 case Iop_I64StoF32:
3536 /* I32(rm) x I64 -> F32 */
3537 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3538
sewardjed69fdb2006-02-03 16:12:27 +00003539 case Iop_RoundF64toInt:
3540 case Iop_RoundF64toF32:
sewardj06f96d02009-12-31 19:24:12 +00003541 case Iop_F64toI64S:
sewardja201c452011-07-24 14:15:54 +00003542 case Iop_F64toI64U:
sewardj06f96d02009-12-31 19:24:12 +00003543 case Iop_I64StoF64:
sewardjf34eb492011-04-15 11:57:05 +00003544 case Iop_I64UtoF64:
sewardj22ac5f42006-02-03 22:55:04 +00003545 case Iop_SinF64:
3546 case Iop_CosF64:
3547 case Iop_TanF64:
3548 case Iop_2xm1F64:
3549 case Iop_SqrtF64:
3550 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00003551 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3552
sewardjea8b02f2012-04-12 17:28:57 +00003553 case Iop_ShlD64:
3554 case Iop_ShrD64:
sewardj18c72fa2012-04-23 11:22:05 +00003555 case Iop_RoundD64toInt:
florian054684f2013-06-06 21:21:46 +00003556 /* I32(rm) x D64 -> D64 */
sewardjea8b02f2012-04-12 17:28:57 +00003557 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3558
3559 case Iop_ShlD128:
3560 case Iop_ShrD128:
sewardj18c72fa2012-04-23 11:22:05 +00003561 case Iop_RoundD128toInt:
florian054684f2013-06-06 21:21:46 +00003562 /* I32(rm) x D128 -> D128 */
sewardjea8b02f2012-04-12 17:28:57 +00003563 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3564
3565 case Iop_D64toI64S:
florian53eb2a02013-01-12 22:04:00 +00003566 case Iop_D64toI64U:
sewardjea8b02f2012-04-12 17:28:57 +00003567 case Iop_I64StoD64:
florian53eb2a02013-01-12 22:04:00 +00003568 case Iop_I64UtoD64:
florian054684f2013-06-06 21:21:46 +00003569 /* I32(rm) x I64/D64 -> D64/I64 */
sewardjea8b02f2012-04-12 17:28:57 +00003570 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3571
florianba5693c2013-06-17 19:04:24 +00003572 case Iop_F32toD32:
3573 case Iop_F64toD32:
3574 case Iop_F128toD32:
3575 case Iop_D32toF32:
3576 case Iop_D64toF32:
3577 case Iop_D128toF32:
3578 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
3579 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3580
3581 case Iop_F32toD64:
florian39b08d82013-05-05 15:05:42 +00003582 case Iop_F64toD64:
florianba5693c2013-06-17 19:04:24 +00003583 case Iop_F128toD64:
3584 case Iop_D32toF64:
florian39b08d82013-05-05 15:05:42 +00003585 case Iop_D64toF64:
florian39b08d82013-05-05 15:05:42 +00003586 case Iop_D128toF64:
florianba5693c2013-06-17 19:04:24 +00003587 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
florian39b08d82013-05-05 15:05:42 +00003588 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3589
florianba5693c2013-06-17 19:04:24 +00003590 case Iop_F32toD128:
3591 case Iop_F64toD128:
florian39b08d82013-05-05 15:05:42 +00003592 case Iop_F128toD128:
florianba5693c2013-06-17 19:04:24 +00003593 case Iop_D32toF128:
3594 case Iop_D64toF128:
florian39b08d82013-05-05 15:05:42 +00003595 case Iop_D128toF128:
florianba5693c2013-06-17 19:04:24 +00003596 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
florian39b08d82013-05-05 15:05:42 +00003597 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3598
sewardjd376a762010-06-27 09:08:54 +00003599 case Iop_RoundF32toInt:
sewardjaec1be32010-01-03 22:29:32 +00003600 case Iop_SqrtF32:
3601 /* I32(rm) x I32/F32 -> I32/F32 */
3602 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3603
sewardjb5b87402011-03-07 16:05:35 +00003604 case Iop_SqrtF128:
3605 /* I32(rm) x F128 -> F128 */
3606 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3607
3608 case Iop_I32StoF32:
florian1b9609a2012-09-01 00:15:45 +00003609 case Iop_I32UtoF32:
sewardjb5b87402011-03-07 16:05:35 +00003610 case Iop_F32toI32S:
florian1b9609a2012-09-01 00:15:45 +00003611 case Iop_F32toI32U:
sewardjb5b87402011-03-07 16:05:35 +00003612 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3613 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3614
3615 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
florian1b9609a2012-09-01 00:15:45 +00003616 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
sewardjb5b87402011-03-07 16:05:35 +00003617 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
florian733b4db2013-06-06 19:13:29 +00003618 case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32 */
3619 case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32 */
sewardjb5b87402011-03-07 16:05:35 +00003620 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3621
3622 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
florian1b9609a2012-09-01 00:15:45 +00003623 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
sewardjb5b87402011-03-07 16:05:35 +00003624 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
florian733b4db2013-06-06 19:13:29 +00003625 case Iop_D128toD64: /* IRRoundingMode(I64) x D128 -> D64 */
3626 case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64 */
3627 case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64 */
sewardjb5b87402011-03-07 16:05:35 +00003628 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3629
3630 case Iop_F64HLtoF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00003631 case Iop_D64HLtoD128:
sewardj350e8f72012-06-25 07:52:15 +00003632 return assignNew('V', mce, Ity_I128,
3633 binop(Iop_64HLto128, vatom1, vatom2));
sewardjb5b87402011-03-07 16:05:35 +00003634
sewardj59570ff2010-01-01 11:59:33 +00003635 case Iop_F64toI32U:
sewardj06f96d02009-12-31 19:24:12 +00003636 case Iop_F64toI32S:
sewardje9e16d32004-12-10 13:17:55 +00003637 case Iop_F64toF32:
sewardjf34eb492011-04-15 11:57:05 +00003638 case Iop_I64UtoF32:
florian53eb2a02013-01-12 22:04:00 +00003639 case Iop_D64toI32U:
3640 case Iop_D64toI32S:
3641 /* First arg is I32 (rounding mode), second is F64/D64 (data). */
sewardj95448072004-11-22 20:19:51 +00003642 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3643
sewardjea8b02f2012-04-12 17:28:57 +00003644 case Iop_D64toD32:
florian054684f2013-06-06 21:21:46 +00003645 /* First arg is I32 (rounding mode), second is D64 (data). */
florianf4bed372012-12-21 04:25:10 +00003646 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
sewardjea8b02f2012-04-12 17:28:57 +00003647
sewardj06f96d02009-12-31 19:24:12 +00003648 case Iop_F64toI16S:
sewardj95448072004-11-22 20:19:51 +00003649 /* First arg is I32 (rounding mode), second is F64 (data). */
3650 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3651
sewardj18c72fa2012-04-23 11:22:05 +00003652 case Iop_InsertExpD64:
3653 /* I64 x I64 -> D64 */
3654 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3655
3656 case Iop_InsertExpD128:
3657 /* I64 x I128 -> D128 */
3658 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3659
sewardjb5b87402011-03-07 16:05:35 +00003660 case Iop_CmpF32:
sewardj95448072004-11-22 20:19:51 +00003661 case Iop_CmpF64:
sewardjb5b87402011-03-07 16:05:35 +00003662 case Iop_CmpF128:
sewardj18c72fa2012-04-23 11:22:05 +00003663 case Iop_CmpD64:
3664 case Iop_CmpD128:
florian29a36b92012-12-26 17:48:46 +00003665 case Iop_CmpExpD64:
3666 case Iop_CmpExpD128:
sewardj95448072004-11-22 20:19:51 +00003667 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3668
3669 /* non-FP after here */
3670
3671 case Iop_DivModU64to32:
3672 case Iop_DivModS64to32:
3673 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3674
sewardj69a13322005-04-23 01:14:51 +00003675 case Iop_DivModU128to64:
3676 case Iop_DivModS128to64:
3677 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3678
florian537ed2d2012-08-20 16:51:39 +00003679 case Iop_8HLto16:
3680 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003681 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003682 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003683 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00003684 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003685
sewardjb5b87402011-03-07 16:05:35 +00003686 case Iop_DivModS64to64:
sewardj6cf40ff2005-04-20 22:31:26 +00003687 case Iop_MullS64:
3688 case Iop_MullU64: {
3689 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3690 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj350e8f72012-06-25 07:52:15 +00003691 return assignNew('V', mce, Ity_I128,
3692 binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00003693 }
3694
sewardj95448072004-11-22 20:19:51 +00003695 case Iop_MullS32:
3696 case Iop_MullU32: {
3697 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3698 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj350e8f72012-06-25 07:52:15 +00003699 return assignNew('V', mce, Ity_I64,
3700 binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00003701 }
3702
3703 case Iop_MullS16:
3704 case Iop_MullU16: {
3705 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3706 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj350e8f72012-06-25 07:52:15 +00003707 return assignNew('V', mce, Ity_I32,
3708 binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00003709 }
3710
3711 case Iop_MullS8:
3712 case Iop_MullU8: {
3713 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3714 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00003715 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00003716 }
3717
sewardj5af05062010-10-18 16:31:14 +00003718 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
cerion9e591082005-06-23 15:28:34 +00003719 case Iop_DivS32:
3720 case Iop_DivU32:
sewardja201c452011-07-24 14:15:54 +00003721 case Iop_DivU32E:
sewardj169ac042011-09-05 12:12:34 +00003722 case Iop_DivS32E:
sewardj2157b2c2012-07-11 13:20:58 +00003723 case Iop_QAdd32S: /* could probably do better */
3724 case Iop_QSub32S: /* could probably do better */
cerion9e591082005-06-23 15:28:34 +00003725 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3726
sewardjb00944a2005-12-23 12:47:16 +00003727 case Iop_DivS64:
3728 case Iop_DivU64:
sewardja201c452011-07-24 14:15:54 +00003729 case Iop_DivS64E:
sewardj169ac042011-09-05 12:12:34 +00003730 case Iop_DivU64E:
sewardjb00944a2005-12-23 12:47:16 +00003731 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3732
sewardj95448072004-11-22 20:19:51 +00003733 case Iop_Add32:
sewardj54eac252012-03-27 10:19:39 +00003734 if (mce->bogusLiterals || mce->useLLVMworkarounds)
sewardjd5204dc2004-12-31 01:16:11 +00003735 return expensiveAddSub(mce,True,Ity_I32,
3736 vatom1,vatom2, atom1,atom2);
3737 else
3738 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00003739 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00003740 if (mce->bogusLiterals)
3741 return expensiveAddSub(mce,False,Ity_I32,
3742 vatom1,vatom2, atom1,atom2);
3743 else
3744 goto cheap_AddSub32;
3745
3746 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00003747 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00003748 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3749
sewardj463b3d92005-07-18 11:41:15 +00003750 case Iop_CmpORD32S:
3751 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00003752 case Iop_CmpORD64S:
3753 case Iop_CmpORD64U:
3754 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00003755
sewardj681be302005-01-15 20:43:58 +00003756 case Iop_Add64:
sewardj54eac252012-03-27 10:19:39 +00003757 if (mce->bogusLiterals || mce->useLLVMworkarounds)
tomd9774d72005-06-27 08:11:01 +00003758 return expensiveAddSub(mce,True,Ity_I64,
3759 vatom1,vatom2, atom1,atom2);
3760 else
3761 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00003762 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00003763 if (mce->bogusLiterals)
3764 return expensiveAddSub(mce,False,Ity_I64,
3765 vatom1,vatom2, atom1,atom2);
3766 else
3767 goto cheap_AddSub64;
3768
3769 cheap_AddSub64:
3770 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00003771 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3772
sewardj95448072004-11-22 20:19:51 +00003773 case Iop_Mul16:
3774 case Iop_Add16:
3775 case Iop_Sub16:
3776 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3777
florian537ed2d2012-08-20 16:51:39 +00003778 case Iop_Mul8:
sewardj95448072004-11-22 20:19:51 +00003779 case Iop_Sub8:
3780 case Iop_Add8:
3781 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3782
sewardj69a13322005-04-23 01:14:51 +00003783 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00003784 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00003785 if (mce->bogusLiterals)
sewardj4cfa81b2012-11-08 10:58:16 +00003786 goto expensive_cmp64;
sewardj69a13322005-04-23 01:14:51 +00003787 else
3788 goto cheap_cmp64;
sewardj4cfa81b2012-11-08 10:58:16 +00003789
3790 expensive_cmp64:
3791 case Iop_ExpCmpNE64:
3792 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3793
sewardj69a13322005-04-23 01:14:51 +00003794 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00003795 case Iop_CmpLE64S: case Iop_CmpLE64U:
3796 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00003797 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3798
sewardjd5204dc2004-12-31 01:16:11 +00003799 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00003800 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00003801 if (mce->bogusLiterals)
sewardj4cfa81b2012-11-08 10:58:16 +00003802 goto expensive_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00003803 else
3804 goto cheap_cmp32;
sewardj4cfa81b2012-11-08 10:58:16 +00003805
3806 expensive_cmp32:
3807 case Iop_ExpCmpNE32:
3808 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3809
sewardjd5204dc2004-12-31 01:16:11 +00003810 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00003811 case Iop_CmpLE32S: case Iop_CmpLE32U:
3812 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00003813 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3814
3815 case Iop_CmpEQ16: case Iop_CmpNE16:
3816 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3817
sewardj4cfa81b2012-11-08 10:58:16 +00003818 case Iop_ExpCmpNE16:
3819 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 );
3820
sewardj95448072004-11-22 20:19:51 +00003821 case Iop_CmpEQ8: case Iop_CmpNE8:
3822 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3823
sewardjafed4c52009-07-12 13:00:17 +00003824 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
3825 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3826 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3827 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3828 /* Just say these all produce a defined result, regardless
3829 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
3830 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3831
sewardjaaddbc22005-10-07 09:49:53 +00003832 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3833 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3834
sewardj95448072004-11-22 20:19:51 +00003835 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00003836 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003837
sewardjdb67f5f2004-12-14 01:15:31 +00003838 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00003839 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003840
florian537ed2d2012-08-20 16:51:39 +00003841 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
sewardjaaddbc22005-10-07 09:49:53 +00003842 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003843
sewardj350e8f72012-06-25 07:52:15 +00003844 case Iop_AndV256:
3845 uifu = mkUifUV256; difd = mkDifDV256;
3846 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003847 case Iop_AndV128:
3848 uifu = mkUifUV128; difd = mkDifDV128;
3849 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003850 case Iop_And64:
3851 uifu = mkUifU64; difd = mkDifD64;
3852 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003853 case Iop_And32:
3854 uifu = mkUifU32; difd = mkDifD32;
3855 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3856 case Iop_And16:
3857 uifu = mkUifU16; difd = mkDifD16;
3858 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3859 case Iop_And8:
3860 uifu = mkUifU8; difd = mkDifD8;
3861 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3862
sewardj350e8f72012-06-25 07:52:15 +00003863 case Iop_OrV256:
3864 uifu = mkUifUV256; difd = mkDifDV256;
3865 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003866 case Iop_OrV128:
3867 uifu = mkUifUV128; difd = mkDifDV128;
3868 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003869 case Iop_Or64:
3870 uifu = mkUifU64; difd = mkDifD64;
3871 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003872 case Iop_Or32:
3873 uifu = mkUifU32; difd = mkDifD32;
3874 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3875 case Iop_Or16:
3876 uifu = mkUifU16; difd = mkDifD16;
3877 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3878 case Iop_Or8:
3879 uifu = mkUifU8; difd = mkDifD8;
3880 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3881
3882 do_And_Or:
3883 return
3884 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00003885 'V', mce,
sewardj95448072004-11-22 20:19:51 +00003886 and_or_ty,
3887 difd(mce, uifu(mce, vatom1, vatom2),
3888 difd(mce, improve(mce, atom1, vatom1),
3889 improve(mce, atom2, vatom2) ) ) );
3890
3891 case Iop_Xor8:
3892 return mkUifU8(mce, vatom1, vatom2);
3893 case Iop_Xor16:
3894 return mkUifU16(mce, vatom1, vatom2);
3895 case Iop_Xor32:
3896 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00003897 case Iop_Xor64:
3898 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00003899 case Iop_XorV128:
3900 return mkUifUV128(mce, vatom1, vatom2);
sewardj350e8f72012-06-25 07:52:15 +00003901 case Iop_XorV256:
3902 return mkUifUV256(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00003903
sewardja2f30952013-03-27 11:40:02 +00003904 /* V256-bit SIMD */
3905
3906 case Iop_ShrN16x16:
3907 case Iop_ShrN32x8:
3908 case Iop_ShrN64x4:
3909 case Iop_SarN16x16:
3910 case Iop_SarN32x8:
3911 case Iop_ShlN16x16:
3912 case Iop_ShlN32x8:
3913 case Iop_ShlN64x4:
3914 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3915 this is wrong now, scalar shifts are done properly lazily.
3916 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003917 complainIfUndefined(mce, atom2, NULL);
sewardja2f30952013-03-27 11:40:02 +00003918 return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2));
3919
3920 case Iop_QSub8Ux32:
3921 case Iop_QSub8Sx32:
3922 case Iop_Sub8x32:
3923 case Iop_Min8Ux32:
3924 case Iop_Min8Sx32:
3925 case Iop_Max8Ux32:
3926 case Iop_Max8Sx32:
3927 case Iop_CmpGT8Sx32:
3928 case Iop_CmpEQ8x32:
3929 case Iop_Avg8Ux32:
3930 case Iop_QAdd8Ux32:
3931 case Iop_QAdd8Sx32:
3932 case Iop_Add8x32:
3933 return binary8Ix32(mce, vatom1, vatom2);
3934
3935 case Iop_QSub16Ux16:
3936 case Iop_QSub16Sx16:
3937 case Iop_Sub16x16:
3938 case Iop_Mul16x16:
3939 case Iop_MulHi16Sx16:
3940 case Iop_MulHi16Ux16:
3941 case Iop_Min16Sx16:
3942 case Iop_Min16Ux16:
3943 case Iop_Max16Sx16:
3944 case Iop_Max16Ux16:
3945 case Iop_CmpGT16Sx16:
3946 case Iop_CmpEQ16x16:
3947 case Iop_Avg16Ux16:
3948 case Iop_QAdd16Ux16:
3949 case Iop_QAdd16Sx16:
3950 case Iop_Add16x16:
3951 return binary16Ix16(mce, vatom1, vatom2);
3952
3953 case Iop_Sub32x8:
3954 case Iop_CmpGT32Sx8:
3955 case Iop_CmpEQ32x8:
3956 case Iop_Add32x8:
3957 case Iop_Max32Ux8:
3958 case Iop_Max32Sx8:
3959 case Iop_Min32Ux8:
3960 case Iop_Min32Sx8:
3961 case Iop_Mul32x8:
3962 return binary32Ix8(mce, vatom1, vatom2);
3963
3964 case Iop_Sub64x4:
3965 case Iop_Add64x4:
3966 case Iop_CmpEQ64x4:
3967 case Iop_CmpGT64Sx4:
3968 return binary64Ix4(mce, vatom1, vatom2);
3969
3970 /* Perm32x8: rearrange values in left arg using steering values
3971 from right arg. So rearrange the vbits in the same way but
3972 pessimise wrt steering values. */
3973 case Iop_Perm32x8:
3974 return mkUifUV256(
3975 mce,
3976 assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)),
3977 mkPCast32x8(mce, vatom2)
3978 );
3979
njn25e49d8e72002-09-23 09:36:25 +00003980 default:
sewardj95448072004-11-22 20:19:51 +00003981 ppIROp(op);
3982 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00003983 }
njn25e49d8e72002-09-23 09:36:25 +00003984}
3985
njn25e49d8e72002-09-23 09:36:25 +00003986
sewardj95448072004-11-22 20:19:51 +00003987static
3988IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3989{
sewardjcafe5052013-01-17 14:24:35 +00003990 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
3991 selection of shadow operation implicitly duplicates the logic in
3992 do_shadow_LoadG and should be kept in sync (in the very unlikely
3993 event that the interpretation of such widening ops changes in
3994 future). See comment in do_shadow_LoadG. */
sewardj95448072004-11-22 20:19:51 +00003995 IRAtom* vatom = expr2vbits( mce, atom );
3996 tl_assert(isOriginalAtom(mce,atom));
3997 switch (op) {
3998
sewardj0b070592004-12-10 21:44:22 +00003999 case Iop_Sqrt64Fx2:
sewardjc46e6cc2014-03-10 10:42:36 +00004000 case Iop_Abs64Fx2:
4001 case Iop_Neg64Fx2:
sewardj0b070592004-12-10 21:44:22 +00004002 return unary64Fx2(mce, vatom);
4003
4004 case Iop_Sqrt64F0x2:
4005 return unary64F0x2(mce, vatom);
4006
sewardj350e8f72012-06-25 07:52:15 +00004007 case Iop_Sqrt32Fx8:
4008 case Iop_RSqrt32Fx8:
4009 case Iop_Recip32Fx8:
4010 return unary32Fx8(mce, vatom);
4011
4012 case Iop_Sqrt64Fx4:
4013 return unary64Fx4(mce, vatom);
4014
sewardj170ee212004-12-10 18:57:51 +00004015 case Iop_Sqrt32Fx4:
4016 case Iop_RSqrt32Fx4:
4017 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00004018 case Iop_I32UtoFx4:
4019 case Iop_I32StoFx4:
4020 case Iop_QFtoI32Ux4_RZ:
4021 case Iop_QFtoI32Sx4_RZ:
4022 case Iop_RoundF32x4_RM:
4023 case Iop_RoundF32x4_RP:
4024 case Iop_RoundF32x4_RN:
4025 case Iop_RoundF32x4_RZ:
sewardj57f92b02010-08-22 11:54:14 +00004026 case Iop_Recip32x4:
4027 case Iop_Abs32Fx4:
4028 case Iop_Neg32Fx4:
4029 case Iop_Rsqrte32Fx4:
sewardj170ee212004-12-10 18:57:51 +00004030 return unary32Fx4(mce, vatom);
4031
sewardj57f92b02010-08-22 11:54:14 +00004032 case Iop_I32UtoFx2:
4033 case Iop_I32StoFx2:
4034 case Iop_Recip32Fx2:
4035 case Iop_Recip32x2:
4036 case Iop_Abs32Fx2:
4037 case Iop_Neg32Fx2:
4038 case Iop_Rsqrte32Fx2:
4039 return unary32Fx2(mce, vatom);
4040
sewardj170ee212004-12-10 18:57:51 +00004041 case Iop_Sqrt32F0x4:
4042 case Iop_RSqrt32F0x4:
4043 case Iop_Recip32F0x4:
4044 return unary32F0x4(mce, vatom);
4045
sewardj20d38f22005-02-07 23:50:18 +00004046 case Iop_32UtoV128:
4047 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00004048 case Iop_Dup8x16:
4049 case Iop_Dup16x8:
4050 case Iop_Dup32x4:
sewardj57f92b02010-08-22 11:54:14 +00004051 case Iop_Reverse16_8x16:
4052 case Iop_Reverse32_8x16:
4053 case Iop_Reverse32_16x8:
4054 case Iop_Reverse64_8x16:
4055 case Iop_Reverse64_16x8:
4056 case Iop_Reverse64_32x4:
sewardj350e8f72012-06-25 07:52:15 +00004057 case Iop_V256toV128_1: case Iop_V256toV128_0:
sewardjc46e6cc2014-03-10 10:42:36 +00004058 case Iop_ZeroHI64ofV128:
4059 case Iop_ZeroHI96ofV128:
4060 case Iop_ZeroHI112ofV128:
4061 case Iop_ZeroHI120ofV128:
sewardj7cf4e6b2008-05-01 20:24:26 +00004062 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00004063
sewardjb5b87402011-03-07 16:05:35 +00004064 case Iop_F128HItoF64: /* F128 -> high half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00004065 case Iop_D128HItoD64: /* D128 -> high half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00004066 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
4067 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00004068 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00004069 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
4070
4071 case Iop_NegF128:
4072 case Iop_AbsF128:
4073 return mkPCastTo(mce, Ity_I128, vatom);
4074
4075 case Iop_I32StoF128: /* signed I32 -> F128 */
4076 case Iop_I64StoF128: /* signed I64 -> F128 */
florian1b9609a2012-09-01 00:15:45 +00004077 case Iop_I32UtoF128: /* unsigned I32 -> F128 */
4078 case Iop_I64UtoF128: /* unsigned I64 -> F128 */
sewardjb5b87402011-03-07 16:05:35 +00004079 case Iop_F32toF128: /* F32 -> F128 */
4080 case Iop_F64toF128: /* F64 -> F128 */
florian53eb2a02013-01-12 22:04:00 +00004081 case Iop_I32StoD128: /* signed I64 -> D128 */
sewardjea8b02f2012-04-12 17:28:57 +00004082 case Iop_I64StoD128: /* signed I64 -> D128 */
florian53eb2a02013-01-12 22:04:00 +00004083 case Iop_I32UtoD128: /* unsigned I32 -> D128 */
4084 case Iop_I64UtoD128: /* unsigned I64 -> D128 */
sewardjb5b87402011-03-07 16:05:35 +00004085 return mkPCastTo(mce, Ity_I128, vatom);
4086
sewardj95448072004-11-22 20:19:51 +00004087 case Iop_F32toF64:
sewardj06f96d02009-12-31 19:24:12 +00004088 case Iop_I32StoF64:
sewardj59570ff2010-01-01 11:59:33 +00004089 case Iop_I32UtoF64:
sewardj95448072004-11-22 20:19:51 +00004090 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00004091 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00004092 case Iop_Est5FRSqrt:
sewardjdead90a2008-08-08 08:38:23 +00004093 case Iop_RoundF64toF64_NEAREST:
4094 case Iop_RoundF64toF64_NegINF:
4095 case Iop_RoundF64toF64_PosINF:
4096 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00004097 case Iop_Clz64:
sewardjea8b02f2012-04-12 17:28:57 +00004098 case Iop_D32toD64:
florian53eb2a02013-01-12 22:04:00 +00004099 case Iop_I32StoD64:
4100 case Iop_I32UtoD64:
sewardj18c72fa2012-04-23 11:22:05 +00004101 case Iop_ExtractExpD64: /* D64 -> I64 */
4102 case Iop_ExtractExpD128: /* D128 -> I64 */
florian974b4092012-12-27 20:06:18 +00004103 case Iop_ExtractSigD64: /* D64 -> I64 */
4104 case Iop_ExtractSigD128: /* D128 -> I64 */
florian1943eb52012-08-22 18:09:07 +00004105 case Iop_DPBtoBCD:
4106 case Iop_BCDtoDPB:
sewardj95448072004-11-22 20:19:51 +00004107 return mkPCastTo(mce, Ity_I64, vatom);
4108
sewardjea8b02f2012-04-12 17:28:57 +00004109 case Iop_D64toD128:
4110 return mkPCastTo(mce, Ity_I128, vatom);
4111
sewardj95448072004-11-22 20:19:51 +00004112 case Iop_Clz32:
sewardjed69fdb2006-02-03 16:12:27 +00004113 case Iop_TruncF64asF32:
sewardj59570ff2010-01-01 11:59:33 +00004114 case Iop_NegF32:
4115 case Iop_AbsF32:
sewardj95448072004-11-22 20:19:51 +00004116 return mkPCastTo(mce, Ity_I32, vatom);
4117
sewardj4cfa81b2012-11-08 10:58:16 +00004118 case Iop_Ctz32:
4119 case Iop_Ctz64:
4120 return expensiveCountTrailingZeroes(mce, op, atom, vatom);
4121
sewardjd9dbc192005-04-27 11:40:27 +00004122 case Iop_1Uto64:
sewardja201c452011-07-24 14:15:54 +00004123 case Iop_1Sto64:
sewardjd9dbc192005-04-27 11:40:27 +00004124 case Iop_8Uto64:
4125 case Iop_8Sto64:
4126 case Iop_16Uto64:
4127 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00004128 case Iop_32Sto64:
4129 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00004130 case Iop_V128to64:
4131 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00004132 case Iop_128HIto64:
4133 case Iop_128to64:
sewardj57f92b02010-08-22 11:54:14 +00004134 case Iop_Dup8x8:
4135 case Iop_Dup16x4:
4136 case Iop_Dup32x2:
4137 case Iop_Reverse16_8x8:
4138 case Iop_Reverse32_8x8:
4139 case Iop_Reverse32_16x4:
4140 case Iop_Reverse64_8x8:
4141 case Iop_Reverse64_16x4:
4142 case Iop_Reverse64_32x2:
sewardj350e8f72012-06-25 07:52:15 +00004143 case Iop_V256to64_0: case Iop_V256to64_1:
4144 case Iop_V256to64_2: case Iop_V256to64_3:
sewardj7cf4e6b2008-05-01 20:24:26 +00004145 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004146
4147 case Iop_64to32:
4148 case Iop_64HIto32:
4149 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00004150 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00004151 case Iop_8Uto32:
4152 case Iop_16Uto32:
4153 case Iop_16Sto32:
4154 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00004155 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00004156 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004157
4158 case Iop_8Sto16:
4159 case Iop_8Uto16:
4160 case Iop_32to16:
4161 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00004162 case Iop_64to16:
sewardjf5176342012-12-13 18:31:49 +00004163 case Iop_GetMSBs8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00004164 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004165
4166 case Iop_1Uto8:
sewardja201c452011-07-24 14:15:54 +00004167 case Iop_1Sto8:
sewardj95448072004-11-22 20:19:51 +00004168 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00004169 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00004170 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00004171 case Iop_64to8:
sewardj4cfa81b2012-11-08 10:58:16 +00004172 case Iop_GetMSBs8x8:
sewardj7cf4e6b2008-05-01 20:24:26 +00004173 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004174
4175 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00004176 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00004177
sewardjd9dbc192005-04-27 11:40:27 +00004178 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00004179 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00004180
sewardj95448072004-11-22 20:19:51 +00004181 case Iop_ReinterpF64asI64:
4182 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00004183 case Iop_ReinterpI32asF32:
sewardj59570ff2010-01-01 11:59:33 +00004184 case Iop_ReinterpF32asI32:
sewardj18c72fa2012-04-23 11:22:05 +00004185 case Iop_ReinterpI64asD64:
sewardj0892b822012-04-29 20:20:16 +00004186 case Iop_ReinterpD64asI64:
sewardj350e8f72012-06-25 07:52:15 +00004187 case Iop_NotV256:
sewardj20d38f22005-02-07 23:50:18 +00004188 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00004189 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00004190 case Iop_Not32:
4191 case Iop_Not16:
4192 case Iop_Not8:
4193 case Iop_Not1:
4194 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00004195
sewardj57f92b02010-08-22 11:54:14 +00004196 case Iop_CmpNEZ8x8:
4197 case Iop_Cnt8x8:
4198 case Iop_Clz8Sx8:
4199 case Iop_Cls8Sx8:
4200 case Iop_Abs8x8:
4201 return mkPCast8x8(mce, vatom);
4202
4203 case Iop_CmpNEZ8x16:
4204 case Iop_Cnt8x16:
4205 case Iop_Clz8Sx16:
4206 case Iop_Cls8Sx16:
4207 case Iop_Abs8x16:
4208 return mkPCast8x16(mce, vatom);
4209
4210 case Iop_CmpNEZ16x4:
4211 case Iop_Clz16Sx4:
4212 case Iop_Cls16Sx4:
4213 case Iop_Abs16x4:
4214 return mkPCast16x4(mce, vatom);
4215
4216 case Iop_CmpNEZ16x8:
4217 case Iop_Clz16Sx8:
4218 case Iop_Cls16Sx8:
4219 case Iop_Abs16x8:
4220 return mkPCast16x8(mce, vatom);
4221
4222 case Iop_CmpNEZ32x2:
4223 case Iop_Clz32Sx2:
4224 case Iop_Cls32Sx2:
4225 case Iop_FtoI32Ux2_RZ:
4226 case Iop_FtoI32Sx2_RZ:
4227 case Iop_Abs32x2:
4228 return mkPCast32x2(mce, vatom);
4229
4230 case Iop_CmpNEZ32x4:
4231 case Iop_Clz32Sx4:
4232 case Iop_Cls32Sx4:
4233 case Iop_FtoI32Ux4_RZ:
4234 case Iop_FtoI32Sx4_RZ:
4235 case Iop_Abs32x4:
4236 return mkPCast32x4(mce, vatom);
4237
florian537ed2d2012-08-20 16:51:39 +00004238 case Iop_CmpwNEZ32:
4239 return mkPCastTo(mce, Ity_I32, vatom);
4240
sewardj57f92b02010-08-22 11:54:14 +00004241 case Iop_CmpwNEZ64:
4242 return mkPCastTo(mce, Ity_I64, vatom);
4243
4244 case Iop_CmpNEZ64x2:
carll24e40de2013-10-15 18:13:21 +00004245 case Iop_CipherSV128:
4246 case Iop_Clz64x2:
sewardj57f92b02010-08-22 11:54:14 +00004247 return mkPCast64x2(mce, vatom);
4248
carlle6bd3e42013-10-18 01:20:11 +00004249 case Iop_PwBitMtxXpose64x2:
4250 return assignNew('V', mce, Ity_V128, unop(op, vatom));
4251
sewardj7ee7d852011-06-16 11:37:21 +00004252 case Iop_NarrowUn16to8x8:
4253 case Iop_NarrowUn32to16x4:
4254 case Iop_NarrowUn64to32x2:
4255 case Iop_QNarrowUn16Sto8Sx8:
4256 case Iop_QNarrowUn16Sto8Ux8:
4257 case Iop_QNarrowUn16Uto8Ux8:
4258 case Iop_QNarrowUn32Sto16Sx4:
4259 case Iop_QNarrowUn32Sto16Ux4:
4260 case Iop_QNarrowUn32Uto16Ux4:
4261 case Iop_QNarrowUn64Sto32Sx2:
4262 case Iop_QNarrowUn64Sto32Ux2:
4263 case Iop_QNarrowUn64Uto32Ux2:
4264 return vectorNarrowUnV128(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00004265
sewardj7ee7d852011-06-16 11:37:21 +00004266 case Iop_Widen8Sto16x8:
4267 case Iop_Widen8Uto16x8:
4268 case Iop_Widen16Sto32x4:
4269 case Iop_Widen16Uto32x4:
4270 case Iop_Widen32Sto64x2:
4271 case Iop_Widen32Uto64x2:
4272 return vectorWidenI64(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00004273
4274 case Iop_PwAddL32Ux2:
4275 case Iop_PwAddL32Sx2:
4276 return mkPCastTo(mce, Ity_I64,
4277 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
4278
4279 case Iop_PwAddL16Ux4:
4280 case Iop_PwAddL16Sx4:
4281 return mkPCast32x2(mce,
4282 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
4283
4284 case Iop_PwAddL8Ux8:
4285 case Iop_PwAddL8Sx8:
4286 return mkPCast16x4(mce,
4287 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
4288
4289 case Iop_PwAddL32Ux4:
4290 case Iop_PwAddL32Sx4:
4291 return mkPCast64x2(mce,
4292 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
4293
4294 case Iop_PwAddL16Ux8:
4295 case Iop_PwAddL16Sx8:
4296 return mkPCast32x4(mce,
4297 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
4298
4299 case Iop_PwAddL8Ux16:
4300 case Iop_PwAddL8Sx16:
4301 return mkPCast16x8(mce,
4302 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
4303
sewardjf34eb492011-04-15 11:57:05 +00004304 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00004305 default:
4306 ppIROp(op);
4307 VG_(tool_panic)("memcheck:expr2vbits_Unop");
4308 }
4309}
4310
4311
sewardjb9e6d242013-05-11 13:42:08 +00004312/* Worker function -- do not call directly. See comments on
4313 expr2vbits_Load for the meaning of |guard|.
4314
4315 Generates IR to (1) perform a definedness test of |addr|, (2)
4316 perform a validity test of |addr|, and (3) return the Vbits for the
4317 location indicated by |addr|. All of this only happens when
4318 |guard| is NULL or |guard| evaluates to True at run time.
4319
4320 If |guard| evaluates to False at run time, the returned value is
4321 the IR-mandated 0x55..55 value, and no checks nor shadow loads are
4322 performed.
4323
4324 The definedness of |guard| itself is not checked. That is assumed
4325 to have been done before this point, by the caller. */
sewardj95448072004-11-22 20:19:51 +00004326static
sewardj67564542013-08-16 08:31:29 +00004327IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
4328 IREndness end, IRType ty,
sewardjcafe5052013-01-17 14:24:35 +00004329 IRAtom* addr, UInt bias, IRAtom* guard )
sewardj95448072004-11-22 20:19:51 +00004330{
sewardj95448072004-11-22 20:19:51 +00004331 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00004332 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00004333
4334 /* First, emit a definedness test for the address. This also sets
4335 the address (shadow) to 'defined' following the test. */
sewardjb9e6d242013-05-11 13:42:08 +00004336 complainIfUndefined( mce, addr, guard );
sewardj95448072004-11-22 20:19:51 +00004337
4338 /* Now cook up a call to the relevant helper function, to read the
4339 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00004340 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00004341
sewardj21a5f8c2013-08-08 10:41:46 +00004342 void* helper = NULL;
4343 const HChar* hname = NULL;
4344 Bool ret_via_outparam = False;
4345
sewardj67564542013-08-16 08:31:29 +00004346 if (end == Iend_LE) {
sewardj2e595852005-06-30 23:33:37 +00004347 switch (ty) {
sewardj67564542013-08-16 08:31:29 +00004348 case Ity_V256: helper = &MC_(helperc_LOADV256le);
4349 hname = "MC_(helperc_LOADV256le)";
4350 ret_via_outparam = True;
4351 break;
sewardj21a5f8c2013-08-08 10:41:46 +00004352 case Ity_V128: helper = &MC_(helperc_LOADV128le);
4353 hname = "MC_(helperc_LOADV128le)";
4354 ret_via_outparam = True;
4355 break;
4356 case Ity_I64: helper = &MC_(helperc_LOADV64le);
4357 hname = "MC_(helperc_LOADV64le)";
4358 break;
4359 case Ity_I32: helper = &MC_(helperc_LOADV32le);
4360 hname = "MC_(helperc_LOADV32le)";
4361 break;
4362 case Ity_I16: helper = &MC_(helperc_LOADV16le);
4363 hname = "MC_(helperc_LOADV16le)";
4364 break;
4365 case Ity_I8: helper = &MC_(helperc_LOADV8);
4366 hname = "MC_(helperc_LOADV8)";
4367 break;
4368 default: ppIRType(ty);
4369 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)");
sewardj2e595852005-06-30 23:33:37 +00004370 }
4371 } else {
sewardj8cf88b72005-07-08 01:29:33 +00004372 switch (ty) {
sewardj67564542013-08-16 08:31:29 +00004373 case Ity_V256: helper = &MC_(helperc_LOADV256be);
4374 hname = "MC_(helperc_LOADV256be)";
4375 ret_via_outparam = True;
4376 break;
sewardj21a5f8c2013-08-08 10:41:46 +00004377 case Ity_V128: helper = &MC_(helperc_LOADV128be);
4378 hname = "MC_(helperc_LOADV128be)";
4379 ret_via_outparam = True;
4380 break;
4381 case Ity_I64: helper = &MC_(helperc_LOADV64be);
4382 hname = "MC_(helperc_LOADV64be)";
4383 break;
4384 case Ity_I32: helper = &MC_(helperc_LOADV32be);
4385 hname = "MC_(helperc_LOADV32be)";
4386 break;
4387 case Ity_I16: helper = &MC_(helperc_LOADV16be);
4388 hname = "MC_(helperc_LOADV16be)";
4389 break;
4390 case Ity_I8: helper = &MC_(helperc_LOADV8);
4391 hname = "MC_(helperc_LOADV8)";
4392 break;
4393 default: ppIRType(ty);
4394 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)");
sewardj8cf88b72005-07-08 01:29:33 +00004395 }
sewardj95448072004-11-22 20:19:51 +00004396 }
4397
sewardj21a5f8c2013-08-08 10:41:46 +00004398 tl_assert(helper);
4399 tl_assert(hname);
4400
sewardj95448072004-11-22 20:19:51 +00004401 /* Generate the actual address into addrAct. */
sewardj21a5f8c2013-08-08 10:41:46 +00004402 IRAtom* addrAct;
sewardj95448072004-11-22 20:19:51 +00004403 if (bias == 0) {
4404 addrAct = addr;
4405 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00004406 IROp mkAdd;
4407 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00004408 IRType tyAddr = mce->hWordTy;
4409 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00004410 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4411 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00004412 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00004413 }
4414
4415 /* We need to have a place to park the V bits we're just about to
4416 read. */
sewardj21a5f8c2013-08-08 10:41:46 +00004417 IRTemp datavbits = newTemp(mce, ty, VSh);
4418
4419 /* Here's the call. */
4420 IRDirty* di;
4421 if (ret_via_outparam) {
4422 di = unsafeIRDirty_1_N( datavbits,
4423 2/*regparms*/,
4424 hname, VG_(fnptr_to_fnentry)( helper ),
floriana5c3ecb2013-08-15 20:55:42 +00004425 mkIRExprVec_2( IRExpr_VECRET(), addrAct ) );
sewardj21a5f8c2013-08-08 10:41:46 +00004426 } else {
4427 di = unsafeIRDirty_1_N( datavbits,
4428 1/*regparms*/,
4429 hname, VG_(fnptr_to_fnentry)( helper ),
4430 mkIRExprVec_1( addrAct ) );
4431 }
4432
sewardj95448072004-11-22 20:19:51 +00004433 setHelperAnns( mce, di );
sewardjcafe5052013-01-17 14:24:35 +00004434 if (guard) {
4435 di->guard = guard;
4436 /* Ideally the didn't-happen return value here would be all-ones
4437 (all-undefined), so it'd be obvious if it got used
4438 inadvertantly. We can get by with the IR-mandated default
4439 value (0b01 repeating, 0x55 etc) as that'll still look pretty
4440 undefined if it ever leaks out. */
4441 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004442 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00004443
4444 return mkexpr(datavbits);
4445}
4446
4447
sewardjcafe5052013-01-17 14:24:35 +00004448/* Generate IR to do a shadow load. The helper is expected to check
4449 the validity of the address and return the V bits for that address.
4450 This can optionally be controlled by a guard, which is assumed to
4451 be True if NULL. In the case where the guard is False at runtime,
sewardjb9e6d242013-05-11 13:42:08 +00004452 the helper will return the didn't-do-the-call value of 0x55..55.
4453 Since that means "completely undefined result", the caller of
sewardjcafe5052013-01-17 14:24:35 +00004454 this function will need to fix up the result somehow in that
4455 case.
sewardjb9e6d242013-05-11 13:42:08 +00004456
4457 Caller of this function is also expected to have checked the
4458 definedness of |guard| before this point.
sewardjcafe5052013-01-17 14:24:35 +00004459*/
sewardj95448072004-11-22 20:19:51 +00004460static
sewardj67564542013-08-16 08:31:29 +00004461IRAtom* expr2vbits_Load ( MCEnv* mce,
4462 IREndness end, IRType ty,
sewardjcafe5052013-01-17 14:24:35 +00004463 IRAtom* addr, UInt bias,
4464 IRAtom* guard )
sewardj170ee212004-12-10 18:57:51 +00004465{
sewardj2e595852005-06-30 23:33:37 +00004466 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00004467 switch (shadowTypeV(ty)) {
sewardj67564542013-08-16 08:31:29 +00004468 case Ity_I8:
4469 case Ity_I16:
4470 case Ity_I32:
sewardj170ee212004-12-10 18:57:51 +00004471 case Ity_I64:
sewardj21a5f8c2013-08-08 10:41:46 +00004472 case Ity_V128:
sewardj67564542013-08-16 08:31:29 +00004473 case Ity_V256:
sewardjcafe5052013-01-17 14:24:35 +00004474 return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
sewardj170ee212004-12-10 18:57:51 +00004475 default:
sewardj2e595852005-06-30 23:33:37 +00004476 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00004477 }
4478}
4479
4480
sewardjcafe5052013-01-17 14:24:35 +00004481/* The most general handler for guarded loads. Assumes the
sewardjb9e6d242013-05-11 13:42:08 +00004482 definedness of GUARD has already been checked by the caller. A
4483 GUARD of NULL is assumed to mean "always True". Generates code to
4484 check the definedness and validity of ADDR.
sewardjcafe5052013-01-17 14:24:35 +00004485
4486 Generate IR to do a shadow load from ADDR and return the V bits.
4487 The loaded type is TY. The loaded data is then (shadow) widened by
4488 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD
4489 evaluates to False at run time then the returned Vbits are simply
4490 VALT instead. Note therefore that the argument type of VWIDEN must
4491 be TY and the result type of VWIDEN must equal the type of VALT.
4492*/
florian434ffae2012-07-19 17:23:42 +00004493static
sewardjcafe5052013-01-17 14:24:35 +00004494IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce,
4495 IREndness end, IRType ty,
4496 IRAtom* addr, UInt bias,
4497 IRAtom* guard,
4498 IROp vwiden, IRAtom* valt )
florian434ffae2012-07-19 17:23:42 +00004499{
sewardjcafe5052013-01-17 14:24:35 +00004500 /* Sanity check the conversion operation, and also set TYWIDE. */
4501 IRType tyWide = Ity_INVALID;
4502 switch (vwiden) {
4503 case Iop_INVALID:
4504 tyWide = ty;
4505 break;
4506 case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32:
4507 tyWide = Ity_I32;
4508 break;
4509 default:
4510 VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General");
florian434ffae2012-07-19 17:23:42 +00004511 }
4512
sewardjcafe5052013-01-17 14:24:35 +00004513 /* If the guard evaluates to True, this will hold the loaded V bits
4514 at TY. If the guard evaluates to False, this will be all
4515 ones, meaning "all undefined", in which case we will have to
florian5686b2d2013-01-29 03:57:40 +00004516 replace it using an ITE below. */
sewardjcafe5052013-01-17 14:24:35 +00004517 IRAtom* iftrue1
4518 = assignNew('V', mce, ty,
4519 expr2vbits_Load(mce, end, ty, addr, bias, guard));
4520 /* Now (shadow-) widen the loaded V bits to the desired width. In
4521 the guard-is-False case, the allowable widening operators will
4522 in the worst case (unsigned widening) at least leave the
4523 pre-widened part as being marked all-undefined, and in the best
4524 case (signed widening) mark the whole widened result as
4525 undefined. Anyway, it doesn't matter really, since in this case
florian5686b2d2013-01-29 03:57:40 +00004526 we will replace said value with the default value |valt| using an
4527 ITE. */
sewardjcafe5052013-01-17 14:24:35 +00004528 IRAtom* iftrue2
4529 = vwiden == Iop_INVALID
4530 ? iftrue1
4531 : assignNew('V', mce, tyWide, unop(vwiden, iftrue1));
4532 /* These are the V bits we will return if the load doesn't take
4533 place. */
4534 IRAtom* iffalse
4535 = valt;
florian5686b2d2013-01-29 03:57:40 +00004536 /* Prepare the cond for the ITE. Convert a NULL cond into
sewardjcafe5052013-01-17 14:24:35 +00004537 something that iropt knows how to fold out later. */
4538 IRAtom* cond
sewardjcc961652013-01-26 11:49:15 +00004539 = guard == NULL ? mkU1(1) : guard;
sewardjcafe5052013-01-17 14:24:35 +00004540 /* And assemble the final result. */
florian5686b2d2013-01-29 03:57:40 +00004541 return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse));
sewardjcafe5052013-01-17 14:24:35 +00004542}
4543
4544
4545/* A simpler handler for guarded loads, in which there is no
4546 conversion operation, and the default V bit return (when the guard
4547 evaluates to False at runtime) is "all defined". If there is no
4548 guard expression or the guard is always TRUE this function behaves
sewardjb9e6d242013-05-11 13:42:08 +00004549 like expr2vbits_Load. It is assumed that definedness of GUARD has
4550 already been checked at the call site. */
sewardjcafe5052013-01-17 14:24:35 +00004551static
4552IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce,
4553 IREndness end, IRType ty,
4554 IRAtom* addr, UInt bias,
4555 IRAtom *guard )
4556{
4557 return expr2vbits_Load_guarded_General(
4558 mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty)
4559 );
florian434ffae2012-07-19 17:23:42 +00004560}
4561
4562
sewardj170ee212004-12-10 18:57:51 +00004563static
florian5686b2d2013-01-29 03:57:40 +00004564IRAtom* expr2vbits_ITE ( MCEnv* mce,
4565 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
sewardj95448072004-11-22 20:19:51 +00004566{
florian5686b2d2013-01-29 03:57:40 +00004567 IRAtom *vbitsC, *vbits0, *vbits1;
sewardj95448072004-11-22 20:19:51 +00004568 IRType ty;
sewardj07bfda22013-01-29 21:11:55 +00004569 /* Given ITE(cond, iftrue, iffalse), generate
4570 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
sewardj95448072004-11-22 20:19:51 +00004571 That is, steer the V bits like the originals, but trash the
4572 result if the steering value is undefined. This gives
4573 lazy propagation. */
4574 tl_assert(isOriginalAtom(mce, cond));
florian5686b2d2013-01-29 03:57:40 +00004575 tl_assert(isOriginalAtom(mce, iftrue));
4576 tl_assert(isOriginalAtom(mce, iffalse));
sewardj95448072004-11-22 20:19:51 +00004577
4578 vbitsC = expr2vbits(mce, cond);
florian5686b2d2013-01-29 03:57:40 +00004579 vbits1 = expr2vbits(mce, iftrue);
sewardj07bfda22013-01-29 21:11:55 +00004580 vbits0 = expr2vbits(mce, iffalse);
sewardj1c0ce7a2009-07-01 08:10:49 +00004581 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00004582
4583 return
sewardj7cf4e6b2008-05-01 20:24:26 +00004584 mkUifU(mce, ty, assignNew('V', mce, ty,
florian5686b2d2013-01-29 03:57:40 +00004585 IRExpr_ITE(cond, vbits1, vbits0)),
sewardj95448072004-11-22 20:19:51 +00004586 mkPCastTo(mce, ty, vbitsC) );
4587}
4588
4589/* --------- This is the main expression-handling function. --------- */
4590
4591static
4592IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4593{
4594 switch (e->tag) {
4595
4596 case Iex_Get:
4597 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4598
4599 case Iex_GetI:
4600 return shadow_GETI( mce, e->Iex.GetI.descr,
4601 e->Iex.GetI.ix, e->Iex.GetI.bias );
4602
sewardj0b9d74a2006-12-24 02:24:11 +00004603 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004604 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00004605
4606 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00004607 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00004608
sewardje91cea72006-02-08 19:32:02 +00004609 case Iex_Qop:
4610 return expr2vbits_Qop(
4611 mce,
floriane2ab2972012-06-01 20:43:03 +00004612 e->Iex.Qop.details->op,
4613 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4614 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
sewardje91cea72006-02-08 19:32:02 +00004615 );
4616
sewardjed69fdb2006-02-03 16:12:27 +00004617 case Iex_Triop:
4618 return expr2vbits_Triop(
4619 mce,
florian26441742012-06-02 20:30:41 +00004620 e->Iex.Triop.details->op,
4621 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4622 e->Iex.Triop.details->arg3
sewardjed69fdb2006-02-03 16:12:27 +00004623 );
4624
sewardj95448072004-11-22 20:19:51 +00004625 case Iex_Binop:
4626 return expr2vbits_Binop(
4627 mce,
4628 e->Iex.Binop.op,
4629 e->Iex.Binop.arg1, e->Iex.Binop.arg2
4630 );
4631
4632 case Iex_Unop:
4633 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4634
sewardj2e595852005-06-30 23:33:37 +00004635 case Iex_Load:
4636 return expr2vbits_Load( mce, e->Iex.Load.end,
4637 e->Iex.Load.ty,
sewardjcafe5052013-01-17 14:24:35 +00004638 e->Iex.Load.addr, 0/*addr bias*/,
4639 NULL/* guard == "always True"*/ );
sewardj95448072004-11-22 20:19:51 +00004640
4641 case Iex_CCall:
4642 return mkLazyN( mce, e->Iex.CCall.args,
4643 e->Iex.CCall.retty,
4644 e->Iex.CCall.cee );
4645
florian5686b2d2013-01-29 03:57:40 +00004646 case Iex_ITE:
4647 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
sewardj07bfda22013-01-29 21:11:55 +00004648 e->Iex.ITE.iffalse);
njn25e49d8e72002-09-23 09:36:25 +00004649
4650 default:
sewardj95448072004-11-22 20:19:51 +00004651 VG_(printf)("\n");
4652 ppIRExpr(e);
4653 VG_(printf)("\n");
4654 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00004655 }
njn25e49d8e72002-09-23 09:36:25 +00004656}
4657
4658/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00004659/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00004660/*------------------------------------------------------------*/
4661
sewardj95448072004-11-22 20:19:51 +00004662/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00004663
4664static
sewardj95448072004-11-22 20:19:51 +00004665IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00004666{
sewardj7cf97ee2004-11-28 14:25:01 +00004667 IRType ty, tyH;
4668
sewardj95448072004-11-22 20:19:51 +00004669 /* vatom is vbits-value and as such can only have a shadow type. */
4670 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00004671
sewardj1c0ce7a2009-07-01 08:10:49 +00004672 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00004673 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00004674
sewardj95448072004-11-22 20:19:51 +00004675 if (tyH == Ity_I32) {
4676 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004677 case Ity_I32:
4678 return vatom;
4679 case Ity_I16:
4680 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4681 case Ity_I8:
4682 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4683 default:
4684 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004685 }
sewardj6cf40ff2005-04-20 22:31:26 +00004686 } else
4687 if (tyH == Ity_I64) {
4688 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004689 case Ity_I32:
4690 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4691 case Ity_I16:
4692 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4693 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4694 case Ity_I8:
4695 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4696 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4697 default:
4698 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00004699 }
sewardj95448072004-11-22 20:19:51 +00004700 } else {
4701 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004702 }
sewardj95448072004-11-22 20:19:51 +00004703 unhandled:
4704 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
4705 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00004706}
4707
njn25e49d8e72002-09-23 09:36:25 +00004708
sewardjcafe5052013-01-17 14:24:35 +00004709/* Generate a shadow store. |addr| is always the original address
4710 atom. You can pass in either originals or V-bits for the data
4711 atom, but obviously not both. This function generates a check for
sewardjb9e6d242013-05-11 13:42:08 +00004712 the definedness and (indirectly) the validity of |addr|, but only
4713 when |guard| evaluates to True at run time (or is NULL).
njn25e49d8e72002-09-23 09:36:25 +00004714
sewardjcafe5052013-01-17 14:24:35 +00004715 |guard| :: Ity_I1 controls whether the store really happens; NULL
4716 means it unconditionally does. Note that |guard| itself is not
4717 checked for definedness; the caller of this function must do that
4718 if necessary.
4719*/
sewardj95448072004-11-22 20:19:51 +00004720static
sewardj2e595852005-06-30 23:33:37 +00004721void do_shadow_Store ( MCEnv* mce,
4722 IREndness end,
4723 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00004724 IRAtom* data, IRAtom* vdata,
4725 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00004726{
sewardj170ee212004-12-10 18:57:51 +00004727 IROp mkAdd;
4728 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00004729 void* helper = NULL;
floriana5f894c2012-10-21 03:43:20 +00004730 const HChar* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00004731 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00004732
4733 tyAddr = mce->hWordTy;
4734 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4735 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00004736 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00004737
sewardj95448072004-11-22 20:19:51 +00004738 if (data) {
4739 tl_assert(!vdata);
4740 tl_assert(isOriginalAtom(mce, data));
4741 tl_assert(bias == 0);
4742 vdata = expr2vbits( mce, data );
4743 } else {
4744 tl_assert(vdata);
4745 }
njn25e49d8e72002-09-23 09:36:25 +00004746
sewardj95448072004-11-22 20:19:51 +00004747 tl_assert(isOriginalAtom(mce,addr));
4748 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00004749
sewardj1c0ce7a2009-07-01 08:10:49 +00004750 if (guard) {
4751 tl_assert(isOriginalAtom(mce, guard));
4752 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4753 }
4754
4755 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00004756
njn1d0825f2006-03-27 11:37:07 +00004757 // If we're not doing undefined value checking, pretend that this value
4758 // is "all valid". That lets Vex's optimiser remove some of the V bit
4759 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00004760 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00004761 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004762 case Ity_V256: // V256 weirdness -- used four times
sewardjbd43bfa2012-06-29 15:29:37 +00004763 c = IRConst_V256(V_BITS32_DEFINED); break;
sewardj45fa9f42012-05-21 10:18:10 +00004764 case Ity_V128: // V128 weirdness -- used twice
sewardj1c0ce7a2009-07-01 08:10:49 +00004765 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00004766 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
4767 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
4768 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
4769 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
4770 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4771 }
4772 vdata = IRExpr_Const( c );
4773 }
4774
sewardj95448072004-11-22 20:19:51 +00004775 /* First, emit a definedness test for the address. This also sets
sewardjb9e6d242013-05-11 13:42:08 +00004776 the address (shadow) to 'defined' following the test. Both of
4777 those actions are gated on |guard|. */
4778 complainIfUndefined( mce, addr, guard );
njn25e49d8e72002-09-23 09:36:25 +00004779
sewardj170ee212004-12-10 18:57:51 +00004780 /* Now decide which helper function to call to write the data V
4781 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00004782 if (end == Iend_LE) {
4783 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004784 case Ity_V256: /* we'll use the helper four times */
sewardj2e595852005-06-30 23:33:37 +00004785 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004786 case Ity_I64: helper = &MC_(helperc_STOREV64le);
4787 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00004788 break;
njn1d0825f2006-03-27 11:37:07 +00004789 case Ity_I32: helper = &MC_(helperc_STOREV32le);
4790 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00004791 break;
njn1d0825f2006-03-27 11:37:07 +00004792 case Ity_I16: helper = &MC_(helperc_STOREV16le);
4793 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00004794 break;
njn1d0825f2006-03-27 11:37:07 +00004795 case Ity_I8: helper = &MC_(helperc_STOREV8);
4796 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00004797 break;
4798 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4799 }
4800 } else {
sewardj8cf88b72005-07-08 01:29:33 +00004801 switch (ty) {
4802 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004803 case Ity_I64: helper = &MC_(helperc_STOREV64be);
4804 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00004805 break;
njn1d0825f2006-03-27 11:37:07 +00004806 case Ity_I32: helper = &MC_(helperc_STOREV32be);
4807 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00004808 break;
njn1d0825f2006-03-27 11:37:07 +00004809 case Ity_I16: helper = &MC_(helperc_STOREV16be);
4810 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00004811 break;
njn1d0825f2006-03-27 11:37:07 +00004812 case Ity_I8: helper = &MC_(helperc_STOREV8);
4813 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00004814 break;
sewardj45fa9f42012-05-21 10:18:10 +00004815 /* Note, no V256 case here, because no big-endian target that
4816 we support, has 256 vectors. */
sewardj8cf88b72005-07-08 01:29:33 +00004817 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
4818 }
sewardj95448072004-11-22 20:19:51 +00004819 }
njn25e49d8e72002-09-23 09:36:25 +00004820
sewardj45fa9f42012-05-21 10:18:10 +00004821 if (UNLIKELY(ty == Ity_V256)) {
4822
4823 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
4824 Q3 being the most significant lane. */
4825 /* These are the offsets of the Qs in memory. */
4826 Int offQ0, offQ1, offQ2, offQ3;
4827
4828 /* Various bits for constructing the 4 lane helper calls */
4829 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
4830 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
4831 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
4832 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
4833
4834 if (end == Iend_LE) {
4835 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
4836 } else {
4837 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
4838 }
4839
4840 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
4841 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
4842 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
4843 diQ0 = unsafeIRDirty_0_N(
4844 1/*regparms*/,
4845 hname, VG_(fnptr_to_fnentry)( helper ),
4846 mkIRExprVec_2( addrQ0, vdataQ0 )
4847 );
4848
4849 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
4850 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
4851 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
4852 diQ1 = unsafeIRDirty_0_N(
4853 1/*regparms*/,
4854 hname, VG_(fnptr_to_fnentry)( helper ),
4855 mkIRExprVec_2( addrQ1, vdataQ1 )
4856 );
4857
4858 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
4859 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
4860 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
4861 diQ2 = unsafeIRDirty_0_N(
4862 1/*regparms*/,
4863 hname, VG_(fnptr_to_fnentry)( helper ),
4864 mkIRExprVec_2( addrQ2, vdataQ2 )
4865 );
4866
4867 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
4868 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
4869 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
4870 diQ3 = unsafeIRDirty_0_N(
4871 1/*regparms*/,
4872 hname, VG_(fnptr_to_fnentry)( helper ),
4873 mkIRExprVec_2( addrQ3, vdataQ3 )
4874 );
4875
4876 if (guard)
4877 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
4878
4879 setHelperAnns( mce, diQ0 );
4880 setHelperAnns( mce, diQ1 );
4881 setHelperAnns( mce, diQ2 );
4882 setHelperAnns( mce, diQ3 );
4883 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
4884 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
4885 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
4886 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
4887
4888 }
4889 else if (UNLIKELY(ty == Ity_V128)) {
sewardj170ee212004-12-10 18:57:51 +00004890
sewardj20d38f22005-02-07 23:50:18 +00004891 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00004892 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00004893 /* also, need to be careful about endianness */
4894
njn4c245e52009-03-15 23:25:38 +00004895 Int offLo64, offHi64;
4896 IRDirty *diLo64, *diHi64;
4897 IRAtom *addrLo64, *addrHi64;
4898 IRAtom *vdataLo64, *vdataHi64;
4899 IRAtom *eBiasLo64, *eBiasHi64;
4900
sewardj2e595852005-06-30 23:33:37 +00004901 if (end == Iend_LE) {
4902 offLo64 = 0;
4903 offHi64 = 8;
4904 } else {
sewardj2e595852005-06-30 23:33:37 +00004905 offLo64 = 8;
4906 offHi64 = 0;
4907 }
4908
4909 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004910 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
4911 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004912 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004913 1/*regparms*/,
4914 hname, VG_(fnptr_to_fnentry)( helper ),
4915 mkIRExprVec_2( addrLo64, vdataLo64 )
4916 );
sewardj2e595852005-06-30 23:33:37 +00004917 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004918 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
4919 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004920 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004921 1/*regparms*/,
4922 hname, VG_(fnptr_to_fnentry)( helper ),
4923 mkIRExprVec_2( addrHi64, vdataHi64 )
4924 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004925 if (guard) diLo64->guard = guard;
4926 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004927 setHelperAnns( mce, diLo64 );
4928 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00004929 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
4930 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00004931
sewardj95448072004-11-22 20:19:51 +00004932 } else {
sewardj170ee212004-12-10 18:57:51 +00004933
njn4c245e52009-03-15 23:25:38 +00004934 IRDirty *di;
4935 IRAtom *addrAct;
4936
sewardj170ee212004-12-10 18:57:51 +00004937 /* 8/16/32/64-bit cases */
4938 /* Generate the actual address into addrAct. */
4939 if (bias == 0) {
4940 addrAct = addr;
4941 } else {
njn4c245e52009-03-15 23:25:38 +00004942 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00004943 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00004944 }
4945
4946 if (ty == Ity_I64) {
4947 /* We can't do this with regparm 2 on 32-bit platforms, since
4948 the back ends aren't clever enough to handle 64-bit
4949 regparm args. Therefore be different. */
4950 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004951 1/*regparms*/,
4952 hname, VG_(fnptr_to_fnentry)( helper ),
4953 mkIRExprVec_2( addrAct, vdata )
4954 );
sewardj170ee212004-12-10 18:57:51 +00004955 } else {
4956 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004957 2/*regparms*/,
4958 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00004959 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00004960 zwidenToHostWord( mce, vdata ))
4961 );
sewardj170ee212004-12-10 18:57:51 +00004962 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004963 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004964 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00004965 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00004966 }
njn25e49d8e72002-09-23 09:36:25 +00004967
sewardj95448072004-11-22 20:19:51 +00004968}
njn25e49d8e72002-09-23 09:36:25 +00004969
njn25e49d8e72002-09-23 09:36:25 +00004970
sewardj95448072004-11-22 20:19:51 +00004971/* Do lazy pessimistic propagation through a dirty helper call, by
4972 looking at the annotations on it. This is the most complex part of
4973 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00004974
sewardj95448072004-11-22 20:19:51 +00004975static IRType szToITy ( Int n )
4976{
4977 switch (n) {
4978 case 1: return Ity_I8;
4979 case 2: return Ity_I16;
4980 case 4: return Ity_I32;
4981 case 8: return Ity_I64;
4982 default: VG_(tool_panic)("szToITy(memcheck)");
4983 }
4984}
njn25e49d8e72002-09-23 09:36:25 +00004985
sewardj95448072004-11-22 20:19:51 +00004986static
4987void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
4988{
sewardj2eecb742012-06-01 16:11:41 +00004989 Int i, k, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00004990 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00004991 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00004992 IRTemp dst;
4993 IREndness end;
4994
4995 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00004996# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004997 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00004998# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004999 end = Iend_LE;
5000# else
5001# error "Unknown endianness"
5002# endif
njn25e49d8e72002-09-23 09:36:25 +00005003
sewardj95448072004-11-22 20:19:51 +00005004 /* First check the guard. */
sewardjb9e6d242013-05-11 13:42:08 +00005005 complainIfUndefined(mce, d->guard, NULL);
sewardj95448072004-11-22 20:19:51 +00005006
5007 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00005008 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00005009
florian434ffae2012-07-19 17:23:42 +00005010 /* Inputs: unmasked args
5011 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj95448072004-11-22 20:19:51 +00005012 for (i = 0; d->args[i]; i++) {
sewardj21a5f8c2013-08-08 10:41:46 +00005013 IRAtom* arg = d->args[i];
5014 if ( (d->cee->mcx_mask & (1<<i))
floriana5c3ecb2013-08-15 20:55:42 +00005015 || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) {
sewardj95448072004-11-22 20:19:51 +00005016 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00005017 } else {
sewardj21a5f8c2013-08-08 10:41:46 +00005018 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, arg) );
sewardj95448072004-11-22 20:19:51 +00005019 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00005020 }
5021 }
sewardj95448072004-11-22 20:19:51 +00005022
5023 /* Inputs: guest state that we read. */
5024 for (i = 0; i < d->nFxState; i++) {
5025 tl_assert(d->fxState[i].fx != Ifx_None);
5026 if (d->fxState[i].fx == Ifx_Write)
5027 continue;
sewardja7203252004-11-26 19:17:47 +00005028
sewardj2eecb742012-06-01 16:11:41 +00005029 /* Enumerate the described state segments */
5030 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5031 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5032 gSz = d->fxState[i].size;
sewardja7203252004-11-26 19:17:47 +00005033
sewardj2eecb742012-06-01 16:11:41 +00005034 /* Ignore any sections marked as 'always defined'. */
5035 if (isAlwaysDefd(mce, gOff, gSz)) {
5036 if (0)
5037 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5038 gOff, gSz);
5039 continue;
5040 }
sewardje9e16d32004-12-10 13:17:55 +00005041
sewardj2eecb742012-06-01 16:11:41 +00005042 /* This state element is read or modified. So we need to
5043 consider it. If larger than 8 bytes, deal with it in
5044 8-byte chunks. */
5045 while (True) {
5046 tl_assert(gSz >= 0);
5047 if (gSz == 0) break;
5048 n = gSz <= 8 ? gSz : 8;
5049 /* update 'curr' with UifU of the state slice
5050 gOff .. gOff+n-1 */
5051 tySrc = szToITy( n );
florian434ffae2012-07-19 17:23:42 +00005052
5053 /* Observe the guard expression. If it is false use an
5054 all-bits-defined bit pattern */
5055 IRAtom *cond, *iffalse, *iftrue;
5056
sewardjcc961652013-01-26 11:49:15 +00005057 cond = assignNew('V', mce, Ity_I1, d->guard);
florian434ffae2012-07-19 17:23:42 +00005058 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
5059 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
5060 src = assignNew('V', mce, tySrc,
florian5686b2d2013-01-29 03:57:40 +00005061 IRExpr_ITE(cond, iftrue, iffalse));
florian434ffae2012-07-19 17:23:42 +00005062
sewardj2eecb742012-06-01 16:11:41 +00005063 here = mkPCastTo( mce, Ity_I32, src );
5064 curr = mkUifU32(mce, here, curr);
5065 gSz -= n;
5066 gOff += n;
5067 }
5068 }
sewardj95448072004-11-22 20:19:51 +00005069 }
5070
5071 /* Inputs: memory. First set up some info needed regardless of
5072 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00005073
5074 if (d->mFx != Ifx_None) {
5075 /* Because we may do multiple shadow loads/stores from the same
5076 base address, it's best to do a single test of its
5077 definedness right now. Post-instrumentation optimisation
5078 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00005079 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00005080 tl_assert(d->mAddr);
sewardjb9e6d242013-05-11 13:42:08 +00005081 complainIfUndefined(mce, d->mAddr, d->guard);
sewardj95448072004-11-22 20:19:51 +00005082
sewardj1c0ce7a2009-07-01 08:10:49 +00005083 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00005084 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
5085 tl_assert(tyAddr == mce->hWordTy); /* not really right */
5086 }
5087
5088 /* Deal with memory inputs (reads or modifies) */
5089 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00005090 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00005091 /* chew off 32-bit chunks. We don't care about the endianness
5092 since it's all going to be condensed down to a single bit,
5093 but nevertheless choose an endianness which is hopefully
5094 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00005095 while (toDo >= 4) {
5096 here = mkPCastTo(
5097 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00005098 expr2vbits_Load_guarded_Simple(
5099 mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00005100 );
5101 curr = mkUifU32(mce, here, curr);
5102 toDo -= 4;
5103 }
5104 /* chew off 16-bit chunks */
5105 while (toDo >= 2) {
5106 here = mkPCastTo(
5107 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00005108 expr2vbits_Load_guarded_Simple(
5109 mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00005110 );
5111 curr = mkUifU32(mce, here, curr);
5112 toDo -= 2;
5113 }
floriancda994b2012-06-08 16:01:19 +00005114 /* chew off the remaining 8-bit chunk, if any */
5115 if (toDo == 1) {
5116 here = mkPCastTo(
5117 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00005118 expr2vbits_Load_guarded_Simple(
5119 mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard )
floriancda994b2012-06-08 16:01:19 +00005120 );
5121 curr = mkUifU32(mce, here, curr);
5122 toDo -= 1;
5123 }
5124 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00005125 }
5126
5127 /* Whew! So curr is a 32-bit V-value summarising pessimistically
5128 all the inputs to the helper. Now we need to re-distribute the
5129 results to all destinations. */
5130
5131 /* Outputs: the destination temporary, if there is one. */
5132 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005133 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00005134 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00005135 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00005136 }
5137
5138 /* Outputs: guest state that we write or modify. */
5139 for (i = 0; i < d->nFxState; i++) {
5140 tl_assert(d->fxState[i].fx != Ifx_None);
5141 if (d->fxState[i].fx == Ifx_Read)
5142 continue;
sewardj2eecb742012-06-01 16:11:41 +00005143
5144 /* Enumerate the described state segments */
5145 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5146 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5147 gSz = d->fxState[i].size;
5148
5149 /* Ignore any sections marked as 'always defined'. */
5150 if (isAlwaysDefd(mce, gOff, gSz))
5151 continue;
5152
5153 /* This state element is written or modified. So we need to
5154 consider it. If larger than 8 bytes, deal with it in
5155 8-byte chunks. */
5156 while (True) {
5157 tl_assert(gSz >= 0);
5158 if (gSz == 0) break;
5159 n = gSz <= 8 ? gSz : 8;
5160 /* Write suitably-casted 'curr' to the state slice
5161 gOff .. gOff+n-1 */
5162 tyDst = szToITy( n );
5163 do_shadow_PUT( mce, gOff,
5164 NULL, /* original atom */
florian434ffae2012-07-19 17:23:42 +00005165 mkPCastTo( mce, tyDst, curr ), d->guard );
sewardj2eecb742012-06-01 16:11:41 +00005166 gSz -= n;
5167 gOff += n;
5168 }
sewardje9e16d32004-12-10 13:17:55 +00005169 }
sewardj95448072004-11-22 20:19:51 +00005170 }
5171
sewardj2e595852005-06-30 23:33:37 +00005172 /* Outputs: memory that we write or modify. Same comments about
5173 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00005174 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00005175 toDo = d->mSize;
5176 /* chew off 32-bit chunks */
5177 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00005178 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5179 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00005180 mkPCastTo( mce, Ity_I32, curr ),
florian434ffae2012-07-19 17:23:42 +00005181 d->guard );
sewardj95448072004-11-22 20:19:51 +00005182 toDo -= 4;
5183 }
5184 /* chew off 16-bit chunks */
5185 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00005186 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5187 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00005188 mkPCastTo( mce, Ity_I16, curr ),
florian434ffae2012-07-19 17:23:42 +00005189 d->guard );
sewardj95448072004-11-22 20:19:51 +00005190 toDo -= 2;
5191 }
floriancda994b2012-06-08 16:01:19 +00005192 /* chew off the remaining 8-bit chunk, if any */
5193 if (toDo == 1) {
5194 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5195 NULL, /* original data */
5196 mkPCastTo( mce, Ity_I8, curr ),
florian434ffae2012-07-19 17:23:42 +00005197 d->guard );
floriancda994b2012-06-08 16:01:19 +00005198 toDo -= 1;
5199 }
5200 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00005201 }
5202
njn25e49d8e72002-09-23 09:36:25 +00005203}
5204
sewardj1c0ce7a2009-07-01 08:10:49 +00005205
sewardj826ec492005-05-12 18:05:00 +00005206/* We have an ABI hint telling us that [base .. base+len-1] is to
5207 become undefined ("writable"). Generate code to call a helper to
5208 notify the A/V bit machinery of this fact.
5209
5210 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00005211 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
5212 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00005213*/
5214static
sewardj7cf4e6b2008-05-01 20:24:26 +00005215void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00005216{
5217 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00005218 /* Minor optimisation: if not doing origin tracking, ignore the
5219 supplied nia and pass zero instead. This is on the basis that
5220 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
5221 almost always generate a shorter instruction to put zero into a
5222 register than any other value. */
5223 if (MC_(clo_mc_level) < 3)
5224 nia = mkIRExpr_HWord(0);
5225
sewardj826ec492005-05-12 18:05:00 +00005226 di = unsafeIRDirty_0_N(
5227 0/*regparms*/,
5228 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00005229 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00005230 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00005231 );
sewardj7cf4e6b2008-05-01 20:24:26 +00005232 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00005233}
5234
njn25e49d8e72002-09-23 09:36:25 +00005235
sewardj1c0ce7a2009-07-01 08:10:49 +00005236/* ------ Dealing with IRCAS (big and complex) ------ */
5237
5238/* FWDS */
5239static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5240 IRAtom* baseaddr, Int offset );
5241static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
5242static void gen_store_b ( MCEnv* mce, Int szB,
5243 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5244 IRAtom* guard );
5245
5246static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
5247static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
5248
5249
5250/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
5251 IRExpr.Consts, else this asserts. If they are both Consts, it
5252 doesn't do anything. So that just leaves the RdTmp case.
5253
5254 In which case: this assigns the shadow value SHADOW to the IR
5255 shadow temporary associated with ORIG. That is, ORIG, being an
5256 original temporary, will have a shadow temporary associated with
5257 it. However, in the case envisaged here, there will so far have
5258 been no IR emitted to actually write a shadow value into that
5259 temporary. What this routine does is to (emit IR to) copy the
5260 value in SHADOW into said temporary, so that after this call,
5261 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
5262 value in SHADOW.
5263
5264 Point is to allow callers to compute "by hand" a shadow value for
5265 ORIG, and force it to be associated with ORIG.
5266
5267 How do we know that that shadow associated with ORIG has not so far
5268 been assigned to? Well, we don't per se know that, but supposing
5269 it had. Then this routine would create a second assignment to it,
5270 and later the IR sanity checker would barf. But that never
5271 happens. QED.
5272*/
5273static void bind_shadow_tmp_to_orig ( UChar how,
5274 MCEnv* mce,
5275 IRAtom* orig, IRAtom* shadow )
5276{
5277 tl_assert(isOriginalAtom(mce, orig));
5278 tl_assert(isShadowAtom(mce, shadow));
5279 switch (orig->tag) {
5280 case Iex_Const:
5281 tl_assert(shadow->tag == Iex_Const);
5282 break;
5283 case Iex_RdTmp:
5284 tl_assert(shadow->tag == Iex_RdTmp);
5285 if (how == 'V') {
5286 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
5287 shadow);
5288 } else {
5289 tl_assert(how == 'B');
5290 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
5291 shadow);
5292 }
5293 break;
5294 default:
5295 tl_assert(0);
5296 }
5297}
5298
5299
5300static
5301void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
5302{
5303 /* Scheme is (both single- and double- cases):
5304
5305 1. fetch data#,dataB (the proposed new value)
5306
5307 2. fetch expd#,expdB (what we expect to see at the address)
5308
5309 3. check definedness of address
5310
5311 4. load old#,oldB from shadow memory; this also checks
5312 addressibility of the address
5313
5314 5. the CAS itself
5315
sewardjafed4c52009-07-12 13:00:17 +00005316 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
sewardj1c0ce7a2009-07-01 08:10:49 +00005317
sewardjafed4c52009-07-12 13:00:17 +00005318 7. if "expected == old" (as computed by (6))
sewardj1c0ce7a2009-07-01 08:10:49 +00005319 store data#,dataB to shadow memory
5320
5321 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
5322 'data' but 7 stores 'data#'. Hence it is possible for the
5323 shadow data to be incorrectly checked and/or updated:
5324
sewardj1c0ce7a2009-07-01 08:10:49 +00005325 * 7 is at least gated correctly, since the 'expected == old'
5326 condition is derived from outputs of 5. However, the shadow
5327 write could happen too late: imagine after 5 we are
5328 descheduled, a different thread runs, writes a different
5329 (shadow) value at the address, and then we resume, hence
5330 overwriting the shadow value written by the other thread.
5331
5332 Because the original memory access is atomic, there's no way to
5333 make both the original and shadow accesses into a single atomic
5334 thing, hence this is unavoidable.
5335
5336 At least as Valgrind stands, I don't think it's a problem, since
5337 we're single threaded *and* we guarantee that there are no
5338 context switches during the execution of any specific superblock
5339 -- context switches can only happen at superblock boundaries.
5340
5341 If Valgrind ever becomes MT in the future, then it might be more
5342 of a problem. A possible kludge would be to artificially
5343 associate with the location, a lock, which we must acquire and
5344 release around the transaction as a whole. Hmm, that probably
5345 would't work properly since it only guards us against other
5346 threads doing CASs on the same location, not against other
5347 threads doing normal reads and writes.
sewardjafed4c52009-07-12 13:00:17 +00005348
5349 ------------------------------------------------------------
5350
5351 COMMENT_ON_CasCmpEQ:
5352
5353 Note two things. Firstly, in the sequence above, we compute
5354 "expected == old", but we don't check definedness of it. Why
5355 not? Also, the x86 and amd64 front ends use
sewardjb9e6d242013-05-11 13:42:08 +00005356 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
sewardjafed4c52009-07-12 13:00:17 +00005357 determination (expected == old ?) for themselves, and we also
5358 don't check definedness for those primops; we just say that the
5359 result is defined. Why? Details follow.
5360
5361 x86/amd64 contains various forms of locked insns:
5362 * lock prefix before all basic arithmetic insn;
5363 eg lock xorl %reg1,(%reg2)
5364 * atomic exchange reg-mem
5365 * compare-and-swaps
5366
5367 Rather than attempt to represent them all, which would be a
5368 royal PITA, I used a result from Maurice Herlihy
5369 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
5370 demonstrates that compare-and-swap is a primitive more general
5371 than the other two, and so can be used to represent all of them.
5372 So the translation scheme for (eg) lock incl (%reg) is as
5373 follows:
5374
5375 again:
5376 old = * %reg
5377 new = old + 1
5378 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
5379
5380 The "atomically" is the CAS bit. The scheme is always the same:
5381 get old value from memory, compute new value, atomically stuff
5382 new value back in memory iff the old value has not changed (iow,
5383 no other thread modified it in the meantime). If it has changed
5384 then we've been out-raced and we have to start over.
5385
5386 Now that's all very neat, but it has the bad side effect of
5387 introducing an explicit equality test into the translation.
5388 Consider the behaviour of said code on a memory location which
5389 is uninitialised. We will wind up doing a comparison on
5390 uninitialised data, and mc duly complains.
5391
5392 What's difficult about this is, the common case is that the
5393 location is uncontended, and so we're usually comparing the same
5394 value (* %reg) with itself. So we shouldn't complain even if it
5395 is undefined. But mc doesn't know that.
5396
5397 My solution is to mark the == in the IR specially, so as to tell
5398 mc that it almost certainly compares a value with itself, and we
5399 should just regard the result as always defined. Rather than
5400 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
5401 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
5402
5403 So there's always the question of, can this give a false
5404 negative? eg, imagine that initially, * %reg is defined; and we
5405 read that; but then in the gap between the read and the CAS, a
5406 different thread writes an undefined (and different) value at
5407 the location. Then the CAS in this thread will fail and we will
5408 go back to "again:", but without knowing that the trip back
5409 there was based on an undefined comparison. No matter; at least
5410 the other thread won the race and the location is correctly
5411 marked as undefined. What if it wrote an uninitialised version
5412 of the same value that was there originally, though?
5413
5414 etc etc. Seems like there's a small corner case in which we
5415 might lose the fact that something's defined -- we're out-raced
5416 in between the "old = * reg" and the "atomically {", _and_ the
5417 other thread is writing in an undefined version of what's
5418 already there. Well, that seems pretty unlikely.
5419
5420 ---
5421
5422 If we ever need to reinstate it .. code which generates a
5423 definedness test for "expected == old" was removed at r10432 of
5424 this file.
sewardj1c0ce7a2009-07-01 08:10:49 +00005425 */
5426 if (cas->oldHi == IRTemp_INVALID) {
5427 do_shadow_CAS_single( mce, cas );
5428 } else {
5429 do_shadow_CAS_double( mce, cas );
5430 }
5431}
5432
5433
5434static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
5435{
5436 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5437 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5438 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00005439 IRAtom *expd_eq_old = NULL;
5440 IROp opCasCmpEQ;
sewardj1c0ce7a2009-07-01 08:10:49 +00005441 Int elemSzB;
5442 IRType elemTy;
5443 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5444
5445 /* single CAS */
5446 tl_assert(cas->oldHi == IRTemp_INVALID);
5447 tl_assert(cas->expdHi == NULL);
5448 tl_assert(cas->dataHi == NULL);
5449
5450 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5451 switch (elemTy) {
sewardjafed4c52009-07-12 13:00:17 +00005452 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
5453 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
5454 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
5455 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
sewardj1c0ce7a2009-07-01 08:10:49 +00005456 default: tl_assert(0); /* IR defn disallows any other types */
5457 }
5458
5459 /* 1. fetch data# (the proposed new value) */
5460 tl_assert(isOriginalAtom(mce, cas->dataLo));
5461 vdataLo
5462 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5463 tl_assert(isShadowAtom(mce, vdataLo));
5464 if (otrak) {
5465 bdataLo
5466 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5467 tl_assert(isShadowAtom(mce, bdataLo));
5468 }
5469
5470 /* 2. fetch expected# (what we expect to see at the address) */
5471 tl_assert(isOriginalAtom(mce, cas->expdLo));
5472 vexpdLo
5473 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5474 tl_assert(isShadowAtom(mce, vexpdLo));
5475 if (otrak) {
5476 bexpdLo
5477 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5478 tl_assert(isShadowAtom(mce, bexpdLo));
5479 }
5480
5481 /* 3. check definedness of address */
5482 /* 4. fetch old# from shadow memory; this also checks
5483 addressibility of the address */
5484 voldLo
5485 = assignNew(
5486 'V', mce, elemTy,
5487 expr2vbits_Load(
5488 mce,
sewardjcafe5052013-01-17 14:24:35 +00005489 cas->end, elemTy, cas->addr, 0/*Addr bias*/,
5490 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005491 ));
sewardjafed4c52009-07-12 13:00:17 +00005492 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005493 if (otrak) {
5494 boldLo
5495 = assignNew('B', mce, Ity_I32,
5496 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005497 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005498 }
5499
5500 /* 5. the CAS itself */
5501 stmt( 'C', mce, IRStmt_CAS(cas) );
5502
sewardjafed4c52009-07-12 13:00:17 +00005503 /* 6. compute "expected == old" */
5504 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005505 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5506 tree, but it's not copied from the input block. */
5507 expd_eq_old
5508 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005509 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005510
5511 /* 7. if "expected == old"
5512 store data# to shadow memory */
5513 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
5514 NULL/*data*/, vdataLo/*vdata*/,
5515 expd_eq_old/*guard for store*/ );
5516 if (otrak) {
5517 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
5518 bdataLo/*bdata*/,
5519 expd_eq_old/*guard for store*/ );
5520 }
5521}
5522
5523
5524static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
5525{
5526 IRAtom *vdataHi = NULL, *bdataHi = NULL;
5527 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5528 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
5529 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5530 IRAtom *voldHi = NULL, *boldHi = NULL;
5531 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00005532 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
5533 IRAtom *expd_eq_old = NULL, *zero = NULL;
5534 IROp opCasCmpEQ, opOr, opXor;
sewardj1c0ce7a2009-07-01 08:10:49 +00005535 Int elemSzB, memOffsLo, memOffsHi;
5536 IRType elemTy;
5537 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5538
5539 /* double CAS */
5540 tl_assert(cas->oldHi != IRTemp_INVALID);
5541 tl_assert(cas->expdHi != NULL);
5542 tl_assert(cas->dataHi != NULL);
5543
5544 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5545 switch (elemTy) {
5546 case Ity_I8:
sewardjafed4c52009-07-12 13:00:17 +00005547 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
sewardj1c0ce7a2009-07-01 08:10:49 +00005548 elemSzB = 1; zero = mkU8(0);
5549 break;
5550 case Ity_I16:
sewardjafed4c52009-07-12 13:00:17 +00005551 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
sewardj1c0ce7a2009-07-01 08:10:49 +00005552 elemSzB = 2; zero = mkU16(0);
5553 break;
5554 case Ity_I32:
sewardjafed4c52009-07-12 13:00:17 +00005555 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
sewardj1c0ce7a2009-07-01 08:10:49 +00005556 elemSzB = 4; zero = mkU32(0);
5557 break;
5558 case Ity_I64:
sewardjafed4c52009-07-12 13:00:17 +00005559 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
sewardj1c0ce7a2009-07-01 08:10:49 +00005560 elemSzB = 8; zero = mkU64(0);
5561 break;
5562 default:
5563 tl_assert(0); /* IR defn disallows any other types */
5564 }
5565
5566 /* 1. fetch data# (the proposed new value) */
5567 tl_assert(isOriginalAtom(mce, cas->dataHi));
5568 tl_assert(isOriginalAtom(mce, cas->dataLo));
5569 vdataHi
5570 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
5571 vdataLo
5572 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5573 tl_assert(isShadowAtom(mce, vdataHi));
5574 tl_assert(isShadowAtom(mce, vdataLo));
5575 if (otrak) {
5576 bdataHi
5577 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
5578 bdataLo
5579 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5580 tl_assert(isShadowAtom(mce, bdataHi));
5581 tl_assert(isShadowAtom(mce, bdataLo));
5582 }
5583
5584 /* 2. fetch expected# (what we expect to see at the address) */
5585 tl_assert(isOriginalAtom(mce, cas->expdHi));
5586 tl_assert(isOriginalAtom(mce, cas->expdLo));
5587 vexpdHi
5588 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5589 vexpdLo
5590 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5591 tl_assert(isShadowAtom(mce, vexpdHi));
5592 tl_assert(isShadowAtom(mce, vexpdLo));
5593 if (otrak) {
5594 bexpdHi
5595 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5596 bexpdLo
5597 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5598 tl_assert(isShadowAtom(mce, bexpdHi));
5599 tl_assert(isShadowAtom(mce, bexpdLo));
5600 }
5601
5602 /* 3. check definedness of address */
5603 /* 4. fetch old# from shadow memory; this also checks
5604 addressibility of the address */
5605 if (cas->end == Iend_LE) {
5606 memOffsLo = 0;
5607 memOffsHi = elemSzB;
5608 } else {
5609 tl_assert(cas->end == Iend_BE);
5610 memOffsLo = elemSzB;
5611 memOffsHi = 0;
5612 }
5613 voldHi
5614 = assignNew(
5615 'V', mce, elemTy,
5616 expr2vbits_Load(
5617 mce,
sewardjcafe5052013-01-17 14:24:35 +00005618 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/,
5619 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005620 ));
5621 voldLo
5622 = assignNew(
5623 'V', mce, elemTy,
5624 expr2vbits_Load(
5625 mce,
sewardjcafe5052013-01-17 14:24:35 +00005626 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/,
5627 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005628 ));
sewardjafed4c52009-07-12 13:00:17 +00005629 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5630 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005631 if (otrak) {
5632 boldHi
5633 = assignNew('B', mce, Ity_I32,
5634 gen_load_b(mce, elemSzB, cas->addr,
5635 memOffsHi/*addr bias*/));
5636 boldLo
5637 = assignNew('B', mce, Ity_I32,
5638 gen_load_b(mce, elemSzB, cas->addr,
5639 memOffsLo/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005640 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5641 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005642 }
5643
5644 /* 5. the CAS itself */
5645 stmt( 'C', mce, IRStmt_CAS(cas) );
5646
sewardjafed4c52009-07-12 13:00:17 +00005647 /* 6. compute "expected == old" */
5648 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005649 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5650 tree, but it's not copied from the input block. */
5651 /*
5652 xHi = oldHi ^ expdHi;
5653 xLo = oldLo ^ expdLo;
5654 xHL = xHi | xLo;
5655 expd_eq_old = xHL == 0;
5656 */
sewardj1c0ce7a2009-07-01 08:10:49 +00005657 xHi = assignNew('C', mce, elemTy,
5658 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005659 xLo = assignNew('C', mce, elemTy,
5660 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005661 xHL = assignNew('C', mce, elemTy,
5662 binop(opOr, xHi, xLo));
sewardj1c0ce7a2009-07-01 08:10:49 +00005663 expd_eq_old
5664 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005665 binop(opCasCmpEQ, xHL, zero));
sewardj1c0ce7a2009-07-01 08:10:49 +00005666
5667 /* 7. if "expected == old"
5668 store data# to shadow memory */
5669 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5670 NULL/*data*/, vdataHi/*vdata*/,
5671 expd_eq_old/*guard for store*/ );
5672 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5673 NULL/*data*/, vdataLo/*vdata*/,
5674 expd_eq_old/*guard for store*/ );
5675 if (otrak) {
5676 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5677 bdataHi/*bdata*/,
5678 expd_eq_old/*guard for store*/ );
5679 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5680 bdataLo/*bdata*/,
5681 expd_eq_old/*guard for store*/ );
5682 }
5683}
5684
5685
sewardjdb5907d2009-11-26 17:20:21 +00005686/* ------ Dealing with LL/SC (not difficult) ------ */
5687
5688static void do_shadow_LLSC ( MCEnv* mce,
5689 IREndness stEnd,
5690 IRTemp stResult,
5691 IRExpr* stAddr,
5692 IRExpr* stStoredata )
5693{
5694 /* In short: treat a load-linked like a normal load followed by an
5695 assignment of the loaded (shadow) data to the result temporary.
5696 Treat a store-conditional like a normal store, and mark the
5697 result temporary as defined. */
5698 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
5699 IRTemp resTmp = findShadowTmpV(mce, stResult);
5700
5701 tl_assert(isIRAtom(stAddr));
5702 if (stStoredata)
5703 tl_assert(isIRAtom(stStoredata));
5704
5705 if (stStoredata == NULL) {
5706 /* Load Linked */
5707 /* Just treat this as a normal load, followed by an assignment of
5708 the value to .result. */
5709 /* Stay sane */
5710 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5711 || resTy == Ity_I16 || resTy == Ity_I8);
5712 assign( 'V', mce, resTmp,
5713 expr2vbits_Load(
sewardjcafe5052013-01-17 14:24:35 +00005714 mce, stEnd, resTy, stAddr, 0/*addr bias*/,
5715 NULL/*always happens*/) );
sewardjdb5907d2009-11-26 17:20:21 +00005716 } else {
5717 /* Store Conditional */
5718 /* Stay sane */
5719 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
5720 stStoredata);
5721 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
5722 || dataTy == Ity_I16 || dataTy == Ity_I8);
5723 do_shadow_Store( mce, stEnd,
5724 stAddr, 0/* addr bias */,
5725 stStoredata,
5726 NULL /* shadow data */,
5727 NULL/*guard*/ );
5728 /* This is a store conditional, so it writes to .result a value
5729 indicating whether or not the store succeeded. Just claim
5730 this value is always defined. In the PowerPC interpretation
5731 of store-conditional, definedness of the success indication
5732 depends on whether the address of the store matches the
5733 reservation address. But we can't tell that here (and
5734 anyway, we're not being PowerPC-specific). At least we are
5735 guaranteed that the definedness of the store address, and its
5736 addressibility, will be checked as per normal. So it seems
5737 pretty safe to just say that the success indication is always
5738 defined.
5739
5740 In schemeS, for origin tracking, we must correspondingly set
5741 a no-origin value for the origin shadow of .result.
5742 */
5743 tl_assert(resTy == Ity_I1);
5744 assign( 'V', mce, resTmp, definedOfType(resTy) );
5745 }
5746}
5747
5748
sewardjcafe5052013-01-17 14:24:35 +00005749/* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
5750
5751static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg )
5752{
sewardjb9e6d242013-05-11 13:42:08 +00005753 complainIfUndefined(mce, sg->guard, NULL);
5754 /* do_shadow_Store will generate code to check the definedness and
5755 validity of sg->addr, in the case where sg->guard evaluates to
5756 True at run-time. */
sewardjcafe5052013-01-17 14:24:35 +00005757 do_shadow_Store( mce, sg->end,
5758 sg->addr, 0/* addr bias */,
5759 sg->data,
5760 NULL /* shadow data */,
5761 sg->guard );
5762}
5763
5764static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg )
5765{
sewardjb9e6d242013-05-11 13:42:08 +00005766 complainIfUndefined(mce, lg->guard, NULL);
5767 /* expr2vbits_Load_guarded_General will generate code to check the
5768 definedness and validity of lg->addr, in the case where
5769 lg->guard evaluates to True at run-time. */
sewardjcafe5052013-01-17 14:24:35 +00005770
5771 /* Look at the LoadG's built-in conversion operation, to determine
5772 the source (actual loaded data) type, and the equivalent IROp.
5773 NOTE that implicitly we are taking a widening operation to be
5774 applied to original atoms and producing one that applies to V
5775 bits. Since signed and unsigned widening are self-shadowing,
5776 this is a straight copy of the op (modulo swapping from the
5777 IRLoadGOp form to the IROp form). Note also therefore that this
5778 implicitly duplicates the logic to do with said widening ops in
5779 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */
5780 IROp vwiden = Iop_INVALID;
5781 IRType loadedTy = Ity_INVALID;
5782 switch (lg->cvt) {
5783 case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break;
5784 case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break;
5785 case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break;
5786 case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break;
5787 case ILGop_8Sto32: loadedTy = Ity_I8; vwiden = Iop_8Sto32; break;
5788 default: VG_(tool_panic)("do_shadow_LoadG");
5789 }
5790
5791 IRAtom* vbits_alt
5792 = expr2vbits( mce, lg->alt );
5793 IRAtom* vbits_final
5794 = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy,
5795 lg->addr, 0/*addr bias*/,
5796 lg->guard, vwiden, vbits_alt );
5797 /* And finally, bind the V bits to the destination temporary. */
5798 assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final );
5799}
5800
5801
sewardj95448072004-11-22 20:19:51 +00005802/*------------------------------------------------------------*/
5803/*--- Memcheck main ---*/
5804/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00005805
sewardj7cf4e6b2008-05-01 20:24:26 +00005806static void schemeS ( MCEnv* mce, IRStmt* st );
5807
sewardj95448072004-11-22 20:19:51 +00005808static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00005809{
sewardj95448072004-11-22 20:19:51 +00005810 ULong n = 0;
5811 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00005812 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00005813 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00005814 return False;
5815 tl_assert(at->tag == Iex_Const);
5816 con = at->Iex.Const.con;
5817 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00005818 case Ico_U1: return False;
5819 case Ico_U8: n = (ULong)con->Ico.U8; break;
5820 case Ico_U16: n = (ULong)con->Ico.U16; break;
5821 case Ico_U32: n = (ULong)con->Ico.U32; break;
5822 case Ico_U64: n = (ULong)con->Ico.U64; break;
5823 case Ico_F64: return False;
sewardjb5b87402011-03-07 16:05:35 +00005824 case Ico_F32i: return False;
sewardjd5204dc2004-12-31 01:16:11 +00005825 case Ico_F64i: return False;
5826 case Ico_V128: return False;
sewardj1eb272f2014-01-26 18:36:52 +00005827 case Ico_V256: return False;
sewardj95448072004-11-22 20:19:51 +00005828 default: ppIRExpr(at); tl_assert(0);
5829 }
5830 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00005831 return (/*32*/ n == 0xFEFEFEFFULL
5832 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00005833 /*32*/ || n == 0x7F7F7F7FULL
sewardja150fe92013-12-11 16:49:46 +00005834 /*32*/ || n == 0x7EFEFEFFULL
5835 /*32*/ || n == 0x81010100ULL
tomd9774d72005-06-27 08:11:01 +00005836 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00005837 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00005838 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00005839 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00005840 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00005841 );
sewardj95448072004-11-22 20:19:51 +00005842}
njn25e49d8e72002-09-23 09:36:25 +00005843
sewardj95448072004-11-22 20:19:51 +00005844static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
5845{
sewardjd5204dc2004-12-31 01:16:11 +00005846 Int i;
5847 IRExpr* e;
5848 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00005849 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00005850 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00005851 case Ist_WrTmp:
5852 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00005853 switch (e->tag) {
5854 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00005855 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00005856 return False;
sewardjd5204dc2004-12-31 01:16:11 +00005857 case Iex_Const:
5858 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00005859 case Iex_Unop:
sewardja150fe92013-12-11 16:49:46 +00005860 return isBogusAtom(e->Iex.Unop.arg)
5861 || e->Iex.Unop.op == Iop_GetMSBs8x16;
sewardjd5204dc2004-12-31 01:16:11 +00005862 case Iex_GetI:
5863 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00005864 case Iex_Binop:
5865 return isBogusAtom(e->Iex.Binop.arg1)
5866 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00005867 case Iex_Triop:
florian26441742012-06-02 20:30:41 +00005868 return isBogusAtom(e->Iex.Triop.details->arg1)
5869 || isBogusAtom(e->Iex.Triop.details->arg2)
5870 || isBogusAtom(e->Iex.Triop.details->arg3);
sewardje91cea72006-02-08 19:32:02 +00005871 case Iex_Qop:
floriane2ab2972012-06-01 20:43:03 +00005872 return isBogusAtom(e->Iex.Qop.details->arg1)
5873 || isBogusAtom(e->Iex.Qop.details->arg2)
5874 || isBogusAtom(e->Iex.Qop.details->arg3)
5875 || isBogusAtom(e->Iex.Qop.details->arg4);
florian5686b2d2013-01-29 03:57:40 +00005876 case Iex_ITE:
5877 return isBogusAtom(e->Iex.ITE.cond)
5878 || isBogusAtom(e->Iex.ITE.iftrue)
5879 || isBogusAtom(e->Iex.ITE.iffalse);
sewardj2e595852005-06-30 23:33:37 +00005880 case Iex_Load:
5881 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00005882 case Iex_CCall:
5883 for (i = 0; e->Iex.CCall.args[i]; i++)
5884 if (isBogusAtom(e->Iex.CCall.args[i]))
5885 return True;
5886 return False;
5887 default:
5888 goto unhandled;
5889 }
sewardjd5204dc2004-12-31 01:16:11 +00005890 case Ist_Dirty:
5891 d = st->Ist.Dirty.details;
sewardj21a5f8c2013-08-08 10:41:46 +00005892 for (i = 0; d->args[i]; i++) {
5893 IRAtom* atom = d->args[i];
floriana5c3ecb2013-08-15 20:55:42 +00005894 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(atom))) {
sewardj21a5f8c2013-08-08 10:41:46 +00005895 if (isBogusAtom(atom))
5896 return True;
5897 }
5898 }
florian6c0aa2c2013-01-21 01:27:22 +00005899 if (isBogusAtom(d->guard))
sewardjd5204dc2004-12-31 01:16:11 +00005900 return True;
5901 if (d->mAddr && isBogusAtom(d->mAddr))
5902 return True;
5903 return False;
sewardj95448072004-11-22 20:19:51 +00005904 case Ist_Put:
5905 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00005906 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00005907 return isBogusAtom(st->Ist.PutI.details->ix)
5908 || isBogusAtom(st->Ist.PutI.details->data);
sewardj2e595852005-06-30 23:33:37 +00005909 case Ist_Store:
5910 return isBogusAtom(st->Ist.Store.addr)
5911 || isBogusAtom(st->Ist.Store.data);
sewardjcafe5052013-01-17 14:24:35 +00005912 case Ist_StoreG: {
5913 IRStoreG* sg = st->Ist.StoreG.details;
5914 return isBogusAtom(sg->addr) || isBogusAtom(sg->data)
5915 || isBogusAtom(sg->guard);
5916 }
5917 case Ist_LoadG: {
5918 IRLoadG* lg = st->Ist.LoadG.details;
5919 return isBogusAtom(lg->addr) || isBogusAtom(lg->alt)
5920 || isBogusAtom(lg->guard);
5921 }
sewardj95448072004-11-22 20:19:51 +00005922 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00005923 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00005924 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00005925 return isBogusAtom(st->Ist.AbiHint.base)
5926 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00005927 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00005928 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00005929 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00005930 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005931 case Ist_CAS:
5932 cas = st->Ist.CAS.details;
5933 return isBogusAtom(cas->addr)
5934 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
5935 || isBogusAtom(cas->expdLo)
5936 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
5937 || isBogusAtom(cas->dataLo);
sewardjdb5907d2009-11-26 17:20:21 +00005938 case Ist_LLSC:
5939 return isBogusAtom(st->Ist.LLSC.addr)
5940 || (st->Ist.LLSC.storedata
5941 ? isBogusAtom(st->Ist.LLSC.storedata)
5942 : False);
sewardj95448072004-11-22 20:19:51 +00005943 default:
5944 unhandled:
5945 ppIRStmt(st);
5946 VG_(tool_panic)("hasBogusLiterals");
5947 }
5948}
njn25e49d8e72002-09-23 09:36:25 +00005949
njn25e49d8e72002-09-23 09:36:25 +00005950
sewardj0b9d74a2006-12-24 02:24:11 +00005951IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00005952 IRSB* sb_in,
sewardj461df9c2006-01-17 02:06:39 +00005953 VexGuestLayout* layout,
5954 VexGuestExtents* vge,
florianca503be2012-10-07 21:59:42 +00005955 VexArchInfo* archinfo_host,
sewardjd54babf2005-03-21 00:55:49 +00005956 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00005957{
sewardj7cf4e6b2008-05-01 20:24:26 +00005958 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00005959 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00005960 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00005961 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00005962 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00005963 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00005964
5965 if (gWordTy != hWordTy) {
5966 /* We don't currently support this case. */
5967 VG_(tool_panic)("host/guest word size mismatch");
5968 }
njn25e49d8e72002-09-23 09:36:25 +00005969
sewardj6cf40ff2005-04-20 22:31:26 +00005970 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00005971 tl_assert(sizeof(UWord) == sizeof(void*));
5972 tl_assert(sizeof(Word) == sizeof(void*));
5973 tl_assert(sizeof(Addr) == sizeof(void*));
5974 tl_assert(sizeof(ULong) == 8);
5975 tl_assert(sizeof(Long) == 8);
5976 tl_assert(sizeof(Addr64) == 8);
5977 tl_assert(sizeof(UInt) == 4);
5978 tl_assert(sizeof(Int) == 4);
5979
5980 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00005981
sewardj0b9d74a2006-12-24 02:24:11 +00005982 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00005983 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00005984
sewardj1c0ce7a2009-07-01 08:10:49 +00005985 /* Set up the running environment. Both .sb and .tmpMap are
5986 modified as we go along. Note that tmps are added to both
5987 .sb->tyenv and .tmpMap together, so the valid index-set for
5988 those two arrays should always be identical. */
5989 VG_(memset)(&mce, 0, sizeof(mce));
5990 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00005991 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00005992 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00005993 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00005994 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005995
sewardj54eac252012-03-27 10:19:39 +00005996 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
5997 Darwin. 10.7 is mostly built with LLVM, which uses these for
5998 bitfield inserts, and we get a lot of false errors if the cheap
5999 interpretation is used, alas. Could solve this much better if
6000 we knew which of such adds came from x86/amd64 LEA instructions,
6001 since these are the only ones really needing the expensive
6002 interpretation, but that would require some way to tag them in
6003 the _toIR.c front ends, which is a lot of faffing around. So
6004 for now just use the slow and blunt-instrument solution. */
6005 mce.useLLVMworkarounds = False;
6006# if defined(VGO_darwin)
6007 mce.useLLVMworkarounds = True;
6008# endif
6009
sewardj1c0ce7a2009-07-01 08:10:49 +00006010 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
6011 sizeof(TempMapEnt));
6012 for (i = 0; i < sb_in->tyenv->types_used; i++) {
6013 TempMapEnt ent;
6014 ent.kind = Orig;
6015 ent.shadowV = IRTemp_INVALID;
6016 ent.shadowB = IRTemp_INVALID;
6017 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00006018 }
sewardj1c0ce7a2009-07-01 08:10:49 +00006019 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00006020
sewardj151b90d2005-07-06 19:42:23 +00006021 /* Make a preliminary inspection of the statements, to see if there
6022 are any dodgy-looking literals. If there are, we generate
6023 extra-detailed (hence extra-expensive) instrumentation in
6024 places. Scan the whole bb even if dodgyness is found earlier,
6025 so that the flatness assertion is applied to all stmts. */
6026
6027 bogus = False;
sewardj95448072004-11-22 20:19:51 +00006028
sewardj1c0ce7a2009-07-01 08:10:49 +00006029 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00006030
sewardj1c0ce7a2009-07-01 08:10:49 +00006031 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00006032 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00006033 tl_assert(isFlatIRStmt(st));
6034
sewardj151b90d2005-07-06 19:42:23 +00006035 if (!bogus) {
6036 bogus = checkForBogusLiterals(st);
6037 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00006038 VG_(printf)("bogus: ");
6039 ppIRStmt(st);
6040 VG_(printf)("\n");
6041 }
6042 }
sewardjd5204dc2004-12-31 01:16:11 +00006043
sewardj151b90d2005-07-06 19:42:23 +00006044 }
6045
6046 mce.bogusLiterals = bogus;
6047
sewardja0871482006-10-18 12:41:55 +00006048 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00006049
sewardj1c0ce7a2009-07-01 08:10:49 +00006050 tl_assert(mce.sb == sb_out);
6051 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00006052
sewardja0871482006-10-18 12:41:55 +00006053 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00006054 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00006055
sewardj1c0ce7a2009-07-01 08:10:49 +00006056 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00006057 tl_assert(st);
6058 tl_assert(isFlatIRStmt(st));
6059
sewardj1c0ce7a2009-07-01 08:10:49 +00006060 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00006061 i++;
6062 }
6063
sewardjf1962d32006-10-19 13:22:16 +00006064 /* Nasty problem. IR optimisation of the pre-instrumented IR may
6065 cause the IR following the preamble to contain references to IR
6066 temporaries defined in the preamble. Because the preamble isn't
6067 instrumented, these temporaries don't have any shadows.
6068 Nevertheless uses of them following the preamble will cause
6069 memcheck to generate references to their shadows. End effect is
6070 to cause IR sanity check failures, due to references to
6071 non-existent shadows. This is only evident for the complex
6072 preambles used for function wrapping on TOC-afflicted platforms
sewardj6e9de462011-06-28 07:25:29 +00006073 (ppc64-linux).
sewardjf1962d32006-10-19 13:22:16 +00006074
6075 The following loop therefore scans the preamble looking for
6076 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00006077 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00006078 'defined'. This is the same resulting IR as if the main
6079 instrumentation loop before had been applied to the statement
6080 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00006081
6082 Similarly, if origin tracking is enabled, we must generate an
6083 assignment for the corresponding origin (B) shadow, claiming
6084 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00006085 */
6086 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006087 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006088 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00006089 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00006090 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00006091 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00006092 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00006093 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
6094 if (MC_(clo_mc_level) == 3) {
6095 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00006096 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00006097 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
6098 }
sewardjf1962d32006-10-19 13:22:16 +00006099 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00006100 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
6101 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00006102 VG_(printf)("\n");
6103 }
6104 }
6105 }
6106
sewardja0871482006-10-18 12:41:55 +00006107 /* Iterate over the remaining stmts to generate instrumentation. */
6108
sewardj1c0ce7a2009-07-01 08:10:49 +00006109 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00006110 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00006111 tl_assert(i < sb_in->stmts_used);
6112 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00006113
sewardj1c0ce7a2009-07-01 08:10:49 +00006114 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00006115
sewardj1c0ce7a2009-07-01 08:10:49 +00006116 st = sb_in->stmts[i];
6117 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00006118
6119 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006120 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00006121 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00006122 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00006123 }
6124
sewardj1c0ce7a2009-07-01 08:10:49 +00006125 if (MC_(clo_mc_level) == 3) {
6126 /* See comments on case Ist_CAS below. */
6127 if (st->tag != Ist_CAS)
6128 schemeS( &mce, st );
6129 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006130
sewardj29faa502005-03-16 18:20:21 +00006131 /* Generate instrumentation code for each stmt ... */
6132
sewardj95448072004-11-22 20:19:51 +00006133 switch (st->tag) {
6134
sewardj0b9d74a2006-12-24 02:24:11 +00006135 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00006136 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
6137 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00006138 break;
6139
sewardj95448072004-11-22 20:19:51 +00006140 case Ist_Put:
6141 do_shadow_PUT( &mce,
6142 st->Ist.Put.offset,
6143 st->Ist.Put.data,
florian434ffae2012-07-19 17:23:42 +00006144 NULL /* shadow atom */, NULL /* guard */ );
njn25e49d8e72002-09-23 09:36:25 +00006145 break;
6146
sewardj95448072004-11-22 20:19:51 +00006147 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00006148 do_shadow_PUTI( &mce, st->Ist.PutI.details);
njn25e49d8e72002-09-23 09:36:25 +00006149 break;
6150
sewardj2e595852005-06-30 23:33:37 +00006151 case Ist_Store:
6152 do_shadow_Store( &mce, st->Ist.Store.end,
6153 st->Ist.Store.addr, 0/* addr bias */,
6154 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00006155 NULL /* shadow data */,
6156 NULL/*guard*/ );
njn25e49d8e72002-09-23 09:36:25 +00006157 break;
6158
sewardjcafe5052013-01-17 14:24:35 +00006159 case Ist_StoreG:
6160 do_shadow_StoreG( &mce, st->Ist.StoreG.details );
6161 break;
6162
6163 case Ist_LoadG:
6164 do_shadow_LoadG( &mce, st->Ist.LoadG.details );
6165 break;
6166
sewardj95448072004-11-22 20:19:51 +00006167 case Ist_Exit:
sewardjb9e6d242013-05-11 13:42:08 +00006168 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
njn25e49d8e72002-09-23 09:36:25 +00006169 break;
6170
sewardj29faa502005-03-16 18:20:21 +00006171 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00006172 break;
6173
6174 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00006175 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00006176 break;
6177
sewardj95448072004-11-22 20:19:51 +00006178 case Ist_Dirty:
6179 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00006180 break;
6181
sewardj826ec492005-05-12 18:05:00 +00006182 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00006183 do_AbiHint( &mce, st->Ist.AbiHint.base,
6184 st->Ist.AbiHint.len,
6185 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00006186 break;
6187
sewardj1c0ce7a2009-07-01 08:10:49 +00006188 case Ist_CAS:
6189 do_shadow_CAS( &mce, st->Ist.CAS.details );
6190 /* Note, do_shadow_CAS copies the CAS itself to the output
6191 block, because it needs to add instrumentation both
6192 before and after it. Hence skip the copy below. Also
6193 skip the origin-tracking stuff (call to schemeS) above,
6194 since that's all tangled up with it too; do_shadow_CAS
6195 does it all. */
6196 break;
6197
sewardjdb5907d2009-11-26 17:20:21 +00006198 case Ist_LLSC:
6199 do_shadow_LLSC( &mce,
6200 st->Ist.LLSC.end,
6201 st->Ist.LLSC.result,
6202 st->Ist.LLSC.addr,
6203 st->Ist.LLSC.storedata );
6204 break;
6205
njn25e49d8e72002-09-23 09:36:25 +00006206 default:
sewardj95448072004-11-22 20:19:51 +00006207 VG_(printf)("\n");
6208 ppIRStmt(st);
6209 VG_(printf)("\n");
6210 VG_(tool_panic)("memcheck: unhandled IRStmt");
6211
6212 } /* switch (st->tag) */
6213
sewardj7cf4e6b2008-05-01 20:24:26 +00006214 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006215 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00006216 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00006217 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00006218 VG_(printf)("\n");
6219 }
6220 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006221 }
sewardj95448072004-11-22 20:19:51 +00006222
sewardj1c0ce7a2009-07-01 08:10:49 +00006223 /* ... and finally copy the stmt itself to the output. Except,
6224 skip the copy of IRCASs; see comments on case Ist_CAS
6225 above. */
6226 if (st->tag != Ist_CAS)
6227 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00006228 }
njn25e49d8e72002-09-23 09:36:25 +00006229
sewardj95448072004-11-22 20:19:51 +00006230 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00006231 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00006232
sewardj95448072004-11-22 20:19:51 +00006233 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006234 VG_(printf)("sb_in->next = ");
6235 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00006236 VG_(printf)("\n\n");
6237 }
njn25e49d8e72002-09-23 09:36:25 +00006238
sewardjb9e6d242013-05-11 13:42:08 +00006239 complainIfUndefined( &mce, sb_in->next, NULL );
njn25e49d8e72002-09-23 09:36:25 +00006240
sewardj7cf4e6b2008-05-01 20:24:26 +00006241 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006242 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00006243 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00006244 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00006245 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006246 }
sewardj95448072004-11-22 20:19:51 +00006247 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006248 }
njn25e49d8e72002-09-23 09:36:25 +00006249
sewardj1c0ce7a2009-07-01 08:10:49 +00006250 /* If this fails, there's been some serious snafu with tmp management,
6251 that should be investigated. */
6252 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
6253 VG_(deleteXA)( mce.tmpMap );
6254
6255 tl_assert(mce.sb == sb_out);
6256 return sb_out;
sewardj95448072004-11-22 20:19:51 +00006257}
njn25e49d8e72002-09-23 09:36:25 +00006258
sewardj81651dc2007-08-28 06:05:20 +00006259/*------------------------------------------------------------*/
6260/*--- Post-tree-build final tidying ---*/
6261/*------------------------------------------------------------*/
6262
6263/* This exploits the observation that Memcheck often produces
6264 repeated conditional calls of the form
6265
sewardj7cf4e6b2008-05-01 20:24:26 +00006266 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00006267
6268 with the same guard expression G guarding the same helper call.
6269 The second and subsequent calls are redundant. This usually
6270 results from instrumentation of guest code containing multiple
6271 memory references at different constant offsets from the same base
6272 register. After optimisation of the instrumentation, you get a
6273 test for the definedness of the base register for each memory
6274 reference, which is kinda pointless. MC_(final_tidy) therefore
6275 looks for such repeated calls and removes all but the first. */
6276
6277/* A struct for recording which (helper, guard) pairs we have already
6278 seen. */
6279typedef
6280 struct { void* entry; IRExpr* guard; }
6281 Pair;
6282
6283/* Return True if e1 and e2 definitely denote the same value (used to
6284 compare guards). Return False if unknown; False is the safe
6285 answer. Since guest registers and guest memory do not have the
6286 SSA property we must return False if any Gets or Loads appear in
6287 the expression. */
6288
6289static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
6290{
6291 if (e1->tag != e2->tag)
6292 return False;
6293 switch (e1->tag) {
6294 case Iex_Const:
6295 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
6296 case Iex_Binop:
6297 return e1->Iex.Binop.op == e2->Iex.Binop.op
6298 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
6299 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
6300 case Iex_Unop:
6301 return e1->Iex.Unop.op == e2->Iex.Unop.op
6302 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
6303 case Iex_RdTmp:
6304 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
florian5686b2d2013-01-29 03:57:40 +00006305 case Iex_ITE:
6306 return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond )
6307 && sameIRValue( e1->Iex.ITE.iftrue, e2->Iex.ITE.iftrue )
6308 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse );
sewardj81651dc2007-08-28 06:05:20 +00006309 case Iex_Qop:
6310 case Iex_Triop:
6311 case Iex_CCall:
6312 /* be lazy. Could define equality for these, but they never
6313 appear to be used. */
6314 return False;
6315 case Iex_Get:
6316 case Iex_GetI:
6317 case Iex_Load:
6318 /* be conservative - these may not give the same value each
6319 time */
6320 return False;
6321 case Iex_Binder:
6322 /* should never see this */
6323 /* fallthrough */
6324 default:
6325 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
6326 ppIRExpr(e1);
6327 VG_(tool_panic)("memcheck:sameIRValue");
6328 return False;
6329 }
6330}
6331
6332/* See if 'pairs' already has an entry for (entry, guard). Return
6333 True if so. If not, add an entry. */
6334
6335static
6336Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
6337{
6338 Pair p;
6339 Pair* pp;
6340 Int i, n = VG_(sizeXA)( pairs );
6341 for (i = 0; i < n; i++) {
6342 pp = VG_(indexXA)( pairs, i );
6343 if (pp->entry == entry && sameIRValue(pp->guard, guard))
6344 return True;
6345 }
6346 p.guard = guard;
6347 p.entry = entry;
6348 VG_(addToXA)( pairs, &p );
6349 return False;
6350}
6351
florian11f3cc82012-10-21 02:19:35 +00006352static Bool is_helperc_value_checkN_fail ( const HChar* name )
sewardj81651dc2007-08-28 06:05:20 +00006353{
6354 return
sewardj7cf4e6b2008-05-01 20:24:26 +00006355 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
6356 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
6357 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
6358 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
6359 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
6360 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
6361 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
6362 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00006363}
6364
6365IRSB* MC_(final_tidy) ( IRSB* sb_in )
6366{
6367 Int i;
6368 IRStmt* st;
6369 IRDirty* di;
6370 IRExpr* guard;
6371 IRCallee* cee;
6372 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00006373 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
6374 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00006375 /* Scan forwards through the statements. Each time a call to one
6376 of the relevant helpers is seen, check if we have made a
6377 previous call to the same helper using the same guard
6378 expression, and if so, delete the call. */
6379 for (i = 0; i < sb_in->stmts_used; i++) {
6380 st = sb_in->stmts[i];
6381 tl_assert(st);
6382 if (st->tag != Ist_Dirty)
6383 continue;
6384 di = st->Ist.Dirty.details;
6385 guard = di->guard;
florian6c0aa2c2013-01-21 01:27:22 +00006386 tl_assert(guard);
sewardj81651dc2007-08-28 06:05:20 +00006387 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
6388 cee = di->cee;
6389 if (!is_helperc_value_checkN_fail( cee->name ))
6390 continue;
6391 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
6392 guard 'guard'. Check if we have already seen a call to this
6393 function with the same guard. If so, delete it. If not,
6394 add it to the set of calls we do know about. */
6395 alreadyPresent = check_or_add( pairs, guard, cee->addr );
6396 if (alreadyPresent) {
6397 sb_in->stmts[i] = IRStmt_NoOp();
6398 if (0) VG_(printf)("XX\n");
6399 }
6400 }
6401 VG_(deleteXA)( pairs );
6402 return sb_in;
6403}
6404
6405
sewardj7cf4e6b2008-05-01 20:24:26 +00006406/*------------------------------------------------------------*/
6407/*--- Origin tracking stuff ---*/
6408/*------------------------------------------------------------*/
6409
sewardj1c0ce7a2009-07-01 08:10:49 +00006410/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00006411static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
6412{
sewardj1c0ce7a2009-07-01 08:10:49 +00006413 TempMapEnt* ent;
6414 /* VG_(indexXA) range-checks 'orig', hence no need to check
6415 here. */
6416 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6417 tl_assert(ent->kind == Orig);
6418 if (ent->shadowB == IRTemp_INVALID) {
6419 IRTemp tmpB
6420 = newTemp( mce, Ity_I32, BSh );
6421 /* newTemp may cause mce->tmpMap to resize, hence previous results
6422 from VG_(indexXA) are invalid. */
6423 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6424 tl_assert(ent->kind == Orig);
6425 tl_assert(ent->shadowB == IRTemp_INVALID);
6426 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00006427 }
sewardj1c0ce7a2009-07-01 08:10:49 +00006428 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00006429}
6430
6431static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
6432{
6433 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
6434}
6435
sewardjcafe5052013-01-17 14:24:35 +00006436
6437/* Make a guarded origin load, with no special handling in the
6438 didn't-happen case. A GUARD of NULL is assumed to mean "always
6439 True".
6440
6441 Generate IR to do a shadow origins load from BASEADDR+OFFSET and
6442 return the otag. The loaded size is SZB. If GUARD evaluates to
6443 False at run time then the returned otag is zero.
6444*/
6445static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB,
6446 IRAtom* baseaddr,
6447 Int offset, IRExpr* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00006448{
6449 void* hFun;
florian6bd9dc12012-11-23 16:17:43 +00006450 const HChar* hName;
sewardj7cf4e6b2008-05-01 20:24:26 +00006451 IRTemp bTmp;
6452 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00006453 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00006454 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6455 IRAtom* ea = baseaddr;
6456 if (offset != 0) {
6457 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6458 : mkU64( (Long)(Int)offset );
6459 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6460 }
sewardj1c0ce7a2009-07-01 08:10:49 +00006461 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00006462
6463 switch (szB) {
6464 case 1: hFun = (void*)&MC_(helperc_b_load1);
6465 hName = "MC_(helperc_b_load1)";
6466 break;
6467 case 2: hFun = (void*)&MC_(helperc_b_load2);
6468 hName = "MC_(helperc_b_load2)";
6469 break;
6470 case 4: hFun = (void*)&MC_(helperc_b_load4);
6471 hName = "MC_(helperc_b_load4)";
6472 break;
6473 case 8: hFun = (void*)&MC_(helperc_b_load8);
6474 hName = "MC_(helperc_b_load8)";
6475 break;
6476 case 16: hFun = (void*)&MC_(helperc_b_load16);
6477 hName = "MC_(helperc_b_load16)";
6478 break;
sewardj45fa9f42012-05-21 10:18:10 +00006479 case 32: hFun = (void*)&MC_(helperc_b_load32);
6480 hName = "MC_(helperc_b_load32)";
6481 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00006482 default:
6483 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
6484 tl_assert(0);
6485 }
6486 di = unsafeIRDirty_1_N(
6487 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
6488 mkIRExprVec_1( ea )
6489 );
sewardjcafe5052013-01-17 14:24:35 +00006490 if (guard) {
6491 di->guard = guard;
6492 /* Ideally the didn't-happen return value here would be
6493 all-zeroes (unknown-origin), so it'd be harmless if it got
6494 used inadvertantly. We slum it out with the IR-mandated
6495 default value (0b01 repeating, 0x55 etc) as that'll probably
6496 trump all legitimate otags via Max32, and it's pretty
6497 obviously bogus. */
6498 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006499 /* no need to mess with any annotations. This call accesses
6500 neither guest state nor guest memory. */
6501 stmt( 'B', mce, IRStmt_Dirty(di) );
6502 if (mce->hWordTy == Ity_I64) {
6503 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00006504 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00006505 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
6506 return mkexpr(bTmp32);
6507 } else {
6508 /* 32-bit host */
6509 return mkexpr(bTmp);
6510 }
6511}
sewardj1c0ce7a2009-07-01 08:10:49 +00006512
sewardjcafe5052013-01-17 14:24:35 +00006513
6514/* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The
6515 loaded size is SZB. The load is regarded as unconditional (always
6516 happens).
6517*/
6518static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
6519 Int offset )
florian434ffae2012-07-19 17:23:42 +00006520{
sewardjcafe5052013-01-17 14:24:35 +00006521 return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/);
florian434ffae2012-07-19 17:23:42 +00006522}
6523
sewardjcafe5052013-01-17 14:24:35 +00006524
6525/* The most general handler for guarded origin loads. A GUARD of NULL
6526 is assumed to mean "always True".
6527
6528 Generate IR to do a shadow origin load from ADDR+BIAS and return
6529 the B bits. The loaded type is TY. If GUARD evaluates to False at
6530 run time then the returned B bits are simply BALT instead.
6531*/
6532static
6533IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce,
6534 IRType ty,
6535 IRAtom* addr, UInt bias,
6536 IRAtom* guard, IRAtom* balt )
6537{
6538 /* If the guard evaluates to True, this will hold the loaded
6539 origin. If the guard evaluates to False, this will be zero,
6540 meaning "unknown origin", in which case we will have to replace
florian5686b2d2013-01-29 03:57:40 +00006541 it using an ITE below. */
sewardjcafe5052013-01-17 14:24:35 +00006542 IRAtom* iftrue
6543 = assignNew('B', mce, Ity_I32,
6544 gen_guarded_load_b(mce, sizeofIRType(ty),
6545 addr, bias, guard));
6546 /* These are the bits we will return if the load doesn't take
6547 place. */
6548 IRAtom* iffalse
6549 = balt;
florian5686b2d2013-01-29 03:57:40 +00006550 /* Prepare the cond for the ITE. Convert a NULL cond into
sewardjcafe5052013-01-17 14:24:35 +00006551 something that iropt knows how to fold out later. */
6552 IRAtom* cond
sewardjcc961652013-01-26 11:49:15 +00006553 = guard == NULL ? mkU1(1) : guard;
sewardjcafe5052013-01-17 14:24:35 +00006554 /* And assemble the final result. */
florian5686b2d2013-01-29 03:57:40 +00006555 return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse));
sewardjcafe5052013-01-17 14:24:35 +00006556}
6557
6558
6559/* Generate a shadow origins store. guard :: Ity_I1 controls whether
6560 the store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00006561static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00006562 IRAtom* baseaddr, Int offset, IRAtom* dataB,
6563 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00006564{
6565 void* hFun;
florian6bd9dc12012-11-23 16:17:43 +00006566 const HChar* hName;
sewardj7cf4e6b2008-05-01 20:24:26 +00006567 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00006568 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00006569 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6570 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00006571 if (guard) {
6572 tl_assert(isOriginalAtom(mce, guard));
6573 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
6574 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006575 if (offset != 0) {
6576 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6577 : mkU64( (Long)(Int)offset );
6578 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6579 }
6580 if (mce->hWordTy == Ity_I64)
6581 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
6582
6583 switch (szB) {
6584 case 1: hFun = (void*)&MC_(helperc_b_store1);
6585 hName = "MC_(helperc_b_store1)";
6586 break;
6587 case 2: hFun = (void*)&MC_(helperc_b_store2);
6588 hName = "MC_(helperc_b_store2)";
6589 break;
6590 case 4: hFun = (void*)&MC_(helperc_b_store4);
6591 hName = "MC_(helperc_b_store4)";
6592 break;
6593 case 8: hFun = (void*)&MC_(helperc_b_store8);
6594 hName = "MC_(helperc_b_store8)";
6595 break;
6596 case 16: hFun = (void*)&MC_(helperc_b_store16);
6597 hName = "MC_(helperc_b_store16)";
6598 break;
sewardj45fa9f42012-05-21 10:18:10 +00006599 case 32: hFun = (void*)&MC_(helperc_b_store32);
6600 hName = "MC_(helperc_b_store32)";
6601 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00006602 default:
6603 tl_assert(0);
6604 }
6605 di = unsafeIRDirty_0_N( 2/*regparms*/,
6606 hName, VG_(fnptr_to_fnentry)( hFun ),
6607 mkIRExprVec_2( ea, dataB )
6608 );
6609 /* no need to mess with any annotations. This call accesses
6610 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00006611 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00006612 stmt( 'B', mce, IRStmt_Dirty(di) );
6613}
6614
6615static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006616 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00006617 if (eTy == Ity_I64)
6618 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
6619 if (eTy == Ity_I32)
6620 return e;
6621 tl_assert(0);
6622}
6623
6624static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006625 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00006626 tl_assert(eTy == Ity_I32);
6627 if (dstTy == Ity_I64)
6628 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
6629 tl_assert(0);
6630}
6631
sewardjdb5907d2009-11-26 17:20:21 +00006632
sewardj7cf4e6b2008-05-01 20:24:26 +00006633static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
6634{
6635 tl_assert(MC_(clo_mc_level) == 3);
6636
6637 switch (e->tag) {
6638
6639 case Iex_GetI: {
6640 IRRegArray* descr_b;
6641 IRAtom *t1, *t2, *t3, *t4;
6642 IRRegArray* descr = e->Iex.GetI.descr;
6643 IRType equivIntTy
6644 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6645 /* If this array is unshadowable for whatever reason, use the
6646 usual approximation. */
6647 if (equivIntTy == Ity_INVALID)
6648 return mkU32(0);
6649 tl_assert(sizeofIRType(equivIntTy) >= 4);
6650 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6651 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6652 equivIntTy, descr->nElems );
6653 /* Do a shadow indexed get of the same size, giving t1. Take
6654 the bottom 32 bits of it, giving t2. Compute into t3 the
6655 origin for the index (almost certainly zero, but there's
6656 no harm in being completely general here, since iropt will
6657 remove any useless code), and fold it in, giving a final
6658 value t4. */
6659 t1 = assignNew( 'B', mce, equivIntTy,
6660 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
6661 e->Iex.GetI.bias ));
6662 t2 = narrowTo32( mce, t1 );
6663 t3 = schemeE( mce, e->Iex.GetI.ix );
6664 t4 = gen_maxU32( mce, t2, t3 );
6665 return t4;
6666 }
6667 case Iex_CCall: {
6668 Int i;
6669 IRAtom* here;
6670 IRExpr** args = e->Iex.CCall.args;
6671 IRAtom* curr = mkU32(0);
6672 for (i = 0; args[i]; i++) {
6673 tl_assert(i < 32);
6674 tl_assert(isOriginalAtom(mce, args[i]));
6675 /* Only take notice of this arg if the callee's
6676 mc-exclusion mask does not say it is to be excluded. */
6677 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
6678 /* the arg is to be excluded from definedness checking.
6679 Do nothing. */
6680 if (0) VG_(printf)("excluding %s(%d)\n",
6681 e->Iex.CCall.cee->name, i);
6682 } else {
6683 /* calculate the arg's definedness, and pessimistically
6684 merge it in. */
6685 here = schemeE( mce, args[i] );
6686 curr = gen_maxU32( mce, curr, here );
6687 }
6688 }
6689 return curr;
6690 }
6691 case Iex_Load: {
6692 Int dszB;
6693 dszB = sizeofIRType(e->Iex.Load.ty);
6694 /* assert that the B value for the address is already
6695 available (somewhere) */
6696 tl_assert(isIRAtom(e->Iex.Load.addr));
6697 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
6698 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
6699 }
florian5686b2d2013-01-29 03:57:40 +00006700 case Iex_ITE: {
6701 IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond );
florian5686b2d2013-01-29 03:57:40 +00006702 IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue );
sewardj07bfda22013-01-29 21:11:55 +00006703 IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse );
sewardj7cf4e6b2008-05-01 20:24:26 +00006704 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
6705 }
6706 case Iex_Qop: {
floriane2ab2972012-06-01 20:43:03 +00006707 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
6708 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
6709 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
6710 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006711 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
6712 gen_maxU32( mce, b3, b4 ) );
6713 }
6714 case Iex_Triop: {
florian26441742012-06-02 20:30:41 +00006715 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
6716 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
6717 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006718 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
6719 }
6720 case Iex_Binop: {
sewardjafed4c52009-07-12 13:00:17 +00006721 switch (e->Iex.Binop.op) {
6722 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
6723 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6724 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6725 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6726 /* Just say these all produce a defined result,
6727 regardless of their arguments. See
6728 COMMENT_ON_CasCmpEQ in this file. */
6729 return mkU32(0);
6730 default: {
6731 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6732 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6733 return gen_maxU32( mce, b1, b2 );
6734 }
6735 }
6736 tl_assert(0);
6737 /*NOTREACHED*/
sewardj7cf4e6b2008-05-01 20:24:26 +00006738 }
6739 case Iex_Unop: {
6740 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6741 return b1;
6742 }
6743 case Iex_Const:
6744 return mkU32(0);
6745 case Iex_RdTmp:
6746 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
6747 case Iex_Get: {
6748 Int b_offset = MC_(get_otrack_shadow_offset)(
6749 e->Iex.Get.offset,
6750 sizeofIRType(e->Iex.Get.ty)
6751 );
6752 tl_assert(b_offset >= -1
6753 && b_offset <= mce->layout->total_sizeB -4);
6754 if (b_offset >= 0) {
6755 /* FIXME: this isn't an atom! */
6756 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
6757 Ity_I32 );
6758 }
6759 return mkU32(0);
6760 }
6761 default:
6762 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
6763 ppIRExpr(e);
6764 VG_(tool_panic)("memcheck:schemeE");
6765 }
6766}
6767
sewardjdb5907d2009-11-26 17:20:21 +00006768
sewardj7cf4e6b2008-05-01 20:24:26 +00006769static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
6770{
6771 // This is a hacked version of do_shadow_Dirty
sewardj2eecb742012-06-01 16:11:41 +00006772 Int i, k, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00006773 IRAtom *here, *curr;
6774 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00006775
6776 /* First check the guard. */
6777 curr = schemeE( mce, d->guard );
6778
6779 /* Now round up all inputs and maxU32 over them. */
6780
florian434ffae2012-07-19 17:23:42 +00006781 /* Inputs: unmasked args
6782 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj7cf4e6b2008-05-01 20:24:26 +00006783 for (i = 0; d->args[i]; i++) {
sewardj21a5f8c2013-08-08 10:41:46 +00006784 IRAtom* arg = d->args[i];
6785 if ( (d->cee->mcx_mask & (1<<i))
floriana5c3ecb2013-08-15 20:55:42 +00006786 || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006787 /* ignore this arg */
6788 } else {
sewardj21a5f8c2013-08-08 10:41:46 +00006789 here = schemeE( mce, arg );
sewardj7cf4e6b2008-05-01 20:24:26 +00006790 curr = gen_maxU32( mce, curr, here );
6791 }
6792 }
6793
6794 /* Inputs: guest state that we read. */
6795 for (i = 0; i < d->nFxState; i++) {
6796 tl_assert(d->fxState[i].fx != Ifx_None);
6797 if (d->fxState[i].fx == Ifx_Write)
6798 continue;
6799
sewardj2eecb742012-06-01 16:11:41 +00006800 /* Enumerate the described state segments */
6801 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6802 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6803 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006804
sewardj2eecb742012-06-01 16:11:41 +00006805 /* Ignore any sections marked as 'always defined'. */
6806 if (isAlwaysDefd(mce, gOff, gSz)) {
6807 if (0)
6808 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6809 gOff, gSz);
6810 continue;
sewardj7cf4e6b2008-05-01 20:24:26 +00006811 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006812
sewardj2eecb742012-06-01 16:11:41 +00006813 /* This state element is read or modified. So we need to
6814 consider it. If larger than 4 bytes, deal with it in
6815 4-byte chunks. */
6816 while (True) {
6817 Int b_offset;
6818 tl_assert(gSz >= 0);
6819 if (gSz == 0) break;
6820 n = gSz <= 4 ? gSz : 4;
6821 /* update 'curr' with maxU32 of the state slice
6822 gOff .. gOff+n-1 */
6823 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6824 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006825 /* Observe the guard expression. If it is false use 0, i.e.
6826 nothing is known about the origin */
6827 IRAtom *cond, *iffalse, *iftrue;
6828
sewardjcc961652013-01-26 11:49:15 +00006829 cond = assignNew( 'B', mce, Ity_I1, d->guard);
florian434ffae2012-07-19 17:23:42 +00006830 iffalse = mkU32(0);
6831 iftrue = assignNew( 'B', mce, Ity_I32,
6832 IRExpr_Get(b_offset
6833 + 2*mce->layout->total_sizeB,
6834 Ity_I32));
6835 here = assignNew( 'B', mce, Ity_I32,
florian5686b2d2013-01-29 03:57:40 +00006836 IRExpr_ITE(cond, iftrue, iffalse));
sewardj2eecb742012-06-01 16:11:41 +00006837 curr = gen_maxU32( mce, curr, here );
6838 }
6839 gSz -= n;
6840 gOff += n;
6841 }
6842 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006843 }
6844
6845 /* Inputs: memory */
6846
6847 if (d->mFx != Ifx_None) {
6848 /* Because we may do multiple shadow loads/stores from the same
6849 base address, it's best to do a single test of its
6850 definedness right now. Post-instrumentation optimisation
6851 should remove all but this test. */
6852 tl_assert(d->mAddr);
6853 here = schemeE( mce, d->mAddr );
6854 curr = gen_maxU32( mce, curr, here );
6855 }
6856
6857 /* Deal with memory inputs (reads or modifies) */
6858 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006859 toDo = d->mSize;
6860 /* chew off 32-bit chunks. We don't care about the endianness
6861 since it's all going to be condensed down to a single bit,
6862 but nevertheless choose an endianness which is hopefully
6863 native to the platform. */
6864 while (toDo >= 4) {
florian434ffae2012-07-19 17:23:42 +00006865 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
6866 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006867 curr = gen_maxU32( mce, curr, here );
6868 toDo -= 4;
6869 }
sewardj8c93fcc2008-10-30 13:08:31 +00006870 /* handle possible 16-bit excess */
6871 while (toDo >= 2) {
florian434ffae2012-07-19 17:23:42 +00006872 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
6873 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006874 curr = gen_maxU32( mce, curr, here );
6875 toDo -= 2;
6876 }
floriancda994b2012-06-08 16:01:19 +00006877 /* chew off the remaining 8-bit chunk, if any */
6878 if (toDo == 1) {
florian434ffae2012-07-19 17:23:42 +00006879 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
6880 d->guard );
floriancda994b2012-06-08 16:01:19 +00006881 curr = gen_maxU32( mce, curr, here );
6882 toDo -= 1;
6883 }
6884 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006885 }
6886
6887 /* Whew! So curr is a 32-bit B-value which should give an origin
6888 of some use if any of the inputs to the helper are undefined.
6889 Now we need to re-distribute the results to all destinations. */
6890
6891 /* Outputs: the destination temporary, if there is one. */
6892 if (d->tmp != IRTemp_INVALID) {
6893 dst = findShadowTmpB(mce, d->tmp);
6894 assign( 'V', mce, dst, curr );
6895 }
6896
6897 /* Outputs: guest state that we write or modify. */
6898 for (i = 0; i < d->nFxState; i++) {
6899 tl_assert(d->fxState[i].fx != Ifx_None);
6900 if (d->fxState[i].fx == Ifx_Read)
6901 continue;
6902
sewardj2eecb742012-06-01 16:11:41 +00006903 /* Enumerate the described state segments */
6904 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6905 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6906 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006907
sewardj2eecb742012-06-01 16:11:41 +00006908 /* Ignore any sections marked as 'always defined'. */
6909 if (isAlwaysDefd(mce, gOff, gSz))
6910 continue;
6911
6912 /* This state element is written or modified. So we need to
6913 consider it. If larger than 4 bytes, deal with it in
6914 4-byte chunks. */
6915 while (True) {
6916 Int b_offset;
6917 tl_assert(gSz >= 0);
6918 if (gSz == 0) break;
6919 n = gSz <= 4 ? gSz : 4;
6920 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
6921 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6922 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006923
florian6c0aa2c2013-01-21 01:27:22 +00006924 /* If the guard expression evaluates to false we simply Put
6925 the value that is already stored in the guest state slot */
6926 IRAtom *cond, *iffalse;
6927
sewardjcc961652013-01-26 11:49:15 +00006928 cond = assignNew('B', mce, Ity_I1,
6929 d->guard);
florian6c0aa2c2013-01-21 01:27:22 +00006930 iffalse = assignNew('B', mce, Ity_I32,
6931 IRExpr_Get(b_offset +
6932 2*mce->layout->total_sizeB,
6933 Ity_I32));
6934 curr = assignNew('V', mce, Ity_I32,
florian5686b2d2013-01-29 03:57:40 +00006935 IRExpr_ITE(cond, curr, iffalse));
florian6c0aa2c2013-01-21 01:27:22 +00006936
sewardj2eecb742012-06-01 16:11:41 +00006937 stmt( 'B', mce, IRStmt_Put(b_offset
florian6c0aa2c2013-01-21 01:27:22 +00006938 + 2*mce->layout->total_sizeB,
sewardj2eecb742012-06-01 16:11:41 +00006939 curr ));
6940 }
6941 gSz -= n;
6942 gOff += n;
sewardj7cf4e6b2008-05-01 20:24:26 +00006943 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006944 }
6945 }
6946
6947 /* Outputs: memory that we write or modify. Same comments about
6948 endianness as above apply. */
6949 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006950 toDo = d->mSize;
6951 /* chew off 32-bit chunks */
6952 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006953 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006954 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006955 toDo -= 4;
6956 }
sewardj8c93fcc2008-10-30 13:08:31 +00006957 /* handle possible 16-bit excess */
6958 while (toDo >= 2) {
sewardjcafe5052013-01-17 14:24:35 +00006959 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
6960 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006961 toDo -= 2;
6962 }
floriancda994b2012-06-08 16:01:19 +00006963 /* chew off the remaining 8-bit chunk, if any */
6964 if (toDo == 1) {
6965 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006966 d->guard );
floriancda994b2012-06-08 16:01:19 +00006967 toDo -= 1;
6968 }
6969 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006970 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006971}
6972
sewardjdb5907d2009-11-26 17:20:21 +00006973
sewardjcafe5052013-01-17 14:24:35 +00006974/* Generate IR for origin shadowing for a general guarded store. */
6975static void do_origins_Store_guarded ( MCEnv* mce,
6976 IREndness stEnd,
6977 IRExpr* stAddr,
6978 IRExpr* stData,
6979 IRExpr* guard )
sewardjdb5907d2009-11-26 17:20:21 +00006980{
6981 Int dszB;
6982 IRAtom* dataB;
6983 /* assert that the B value for the address is already available
6984 (somewhere), since the call to schemeE will want to see it.
6985 XXXX how does this actually ensure that?? */
6986 tl_assert(isIRAtom(stAddr));
6987 tl_assert(isIRAtom(stData));
6988 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
6989 dataB = schemeE( mce, stData );
sewardjcafe5052013-01-17 14:24:35 +00006990 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard );
6991}
6992
6993
6994/* Generate IR for origin shadowing for a plain store. */
6995static void do_origins_Store_plain ( MCEnv* mce,
6996 IREndness stEnd,
6997 IRExpr* stAddr,
6998 IRExpr* stData )
6999{
7000 do_origins_Store_guarded ( mce, stEnd, stAddr, stData,
7001 NULL/*guard*/ );
7002}
7003
7004
7005/* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
7006
7007static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg )
7008{
7009 do_origins_Store_guarded( mce, sg->end, sg->addr,
7010 sg->data, sg->guard );
7011}
7012
7013static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg )
7014{
7015 IRType loadedTy = Ity_INVALID;
7016 switch (lg->cvt) {
7017 case ILGop_Ident32: loadedTy = Ity_I32; break;
7018 case ILGop_16Uto32: loadedTy = Ity_I16; break;
7019 case ILGop_16Sto32: loadedTy = Ity_I16; break;
7020 case ILGop_8Uto32: loadedTy = Ity_I8; break;
7021 case ILGop_8Sto32: loadedTy = Ity_I8; break;
7022 default: VG_(tool_panic)("schemeS.IRLoadG");
7023 }
7024 IRAtom* ori_alt
7025 = schemeE( mce,lg->alt );
7026 IRAtom* ori_final
7027 = expr2ori_Load_guarded_General(mce, loadedTy,
7028 lg->addr, 0/*addr bias*/,
7029 lg->guard, ori_alt );
7030 /* And finally, bind the origin to the destination temporary. */
7031 assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final );
sewardjdb5907d2009-11-26 17:20:21 +00007032}
7033
7034
sewardj7cf4e6b2008-05-01 20:24:26 +00007035static void schemeS ( MCEnv* mce, IRStmt* st )
7036{
7037 tl_assert(MC_(clo_mc_level) == 3);
7038
7039 switch (st->tag) {
7040
7041 case Ist_AbiHint:
7042 /* The value-check instrumenter handles this - by arranging
7043 to pass the address of the next instruction to
7044 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
7045 happen for origin tracking w.r.t. AbiHints. So there is
7046 nothing to do here. */
7047 break;
7048
7049 case Ist_PutI: {
floriand39b0222012-05-31 15:48:13 +00007050 IRPutI *puti = st->Ist.PutI.details;
sewardj7cf4e6b2008-05-01 20:24:26 +00007051 IRRegArray* descr_b;
7052 IRAtom *t1, *t2, *t3, *t4;
floriand39b0222012-05-31 15:48:13 +00007053 IRRegArray* descr = puti->descr;
sewardj7cf4e6b2008-05-01 20:24:26 +00007054 IRType equivIntTy
7055 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
7056 /* If this array is unshadowable for whatever reason,
7057 generate no code. */
7058 if (equivIntTy == Ity_INVALID)
7059 break;
7060 tl_assert(sizeofIRType(equivIntTy) >= 4);
7061 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
7062 descr_b
7063 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
7064 equivIntTy, descr->nElems );
7065 /* Compute a value to Put - the conjoinment of the origin for
7066 the data to be Put-ted (obviously) and of the index value
7067 (not so obviously). */
floriand39b0222012-05-31 15:48:13 +00007068 t1 = schemeE( mce, puti->data );
7069 t2 = schemeE( mce, puti->ix );
sewardj7cf4e6b2008-05-01 20:24:26 +00007070 t3 = gen_maxU32( mce, t1, t2 );
7071 t4 = zWidenFrom32( mce, equivIntTy, t3 );
floriand39b0222012-05-31 15:48:13 +00007072 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
7073 puti->bias, t4) ));
sewardj7cf4e6b2008-05-01 20:24:26 +00007074 break;
7075 }
sewardjdb5907d2009-11-26 17:20:21 +00007076
sewardj7cf4e6b2008-05-01 20:24:26 +00007077 case Ist_Dirty:
7078 do_origins_Dirty( mce, st->Ist.Dirty.details );
7079 break;
sewardjdb5907d2009-11-26 17:20:21 +00007080
7081 case Ist_Store:
sewardjcafe5052013-01-17 14:24:35 +00007082 do_origins_Store_plain( mce, st->Ist.Store.end,
7083 st->Ist.Store.addr,
7084 st->Ist.Store.data );
7085 break;
7086
7087 case Ist_StoreG:
7088 do_origins_StoreG( mce, st->Ist.StoreG.details );
7089 break;
7090
7091 case Ist_LoadG:
7092 do_origins_LoadG( mce, st->Ist.LoadG.details );
sewardjdb5907d2009-11-26 17:20:21 +00007093 break;
7094
7095 case Ist_LLSC: {
7096 /* In short: treat a load-linked like a normal load followed
7097 by an assignment of the loaded (shadow) data the result
7098 temporary. Treat a store-conditional like a normal store,
7099 and mark the result temporary as defined. */
7100 if (st->Ist.LLSC.storedata == NULL) {
7101 /* Load Linked */
7102 IRType resTy
7103 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
7104 IRExpr* vanillaLoad
7105 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
7106 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
7107 || resTy == Ity_I16 || resTy == Ity_I8);
7108 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7109 schemeE(mce, vanillaLoad));
7110 } else {
7111 /* Store conditional */
sewardjcafe5052013-01-17 14:24:35 +00007112 do_origins_Store_plain( mce, st->Ist.LLSC.end,
7113 st->Ist.LLSC.addr,
7114 st->Ist.LLSC.storedata );
sewardjdb5907d2009-11-26 17:20:21 +00007115 /* For the rationale behind this, see comments at the
7116 place where the V-shadow for .result is constructed, in
7117 do_shadow_LLSC. In short, we regard .result as
7118 always-defined. */
7119 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7120 mkU32(0) );
sewardj1c0ce7a2009-07-01 08:10:49 +00007121 }
sewardj7cf4e6b2008-05-01 20:24:26 +00007122 break;
7123 }
sewardjdb5907d2009-11-26 17:20:21 +00007124
sewardj7cf4e6b2008-05-01 20:24:26 +00007125 case Ist_Put: {
7126 Int b_offset
7127 = MC_(get_otrack_shadow_offset)(
7128 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00007129 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00007130 );
7131 if (b_offset >= 0) {
7132 /* FIXME: this isn't an atom! */
7133 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
7134 schemeE( mce, st->Ist.Put.data )) );
7135 }
7136 break;
7137 }
sewardjdb5907d2009-11-26 17:20:21 +00007138
sewardj7cf4e6b2008-05-01 20:24:26 +00007139 case Ist_WrTmp:
7140 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
7141 schemeE(mce, st->Ist.WrTmp.data) );
7142 break;
sewardjdb5907d2009-11-26 17:20:21 +00007143
sewardj7cf4e6b2008-05-01 20:24:26 +00007144 case Ist_MBE:
7145 case Ist_NoOp:
7146 case Ist_Exit:
7147 case Ist_IMark:
7148 break;
sewardjdb5907d2009-11-26 17:20:21 +00007149
sewardj7cf4e6b2008-05-01 20:24:26 +00007150 default:
7151 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
7152 ppIRStmt(st);
7153 VG_(tool_panic)("memcheck:schemeS");
7154 }
7155}
7156
7157
njn25e49d8e72002-09-23 09:36:25 +00007158/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00007159/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00007160/*--------------------------------------------------------------------*/