blob: 4e102abea82e5e23daf491cf1e0fb5826bb4e238 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
Elliott Hughesed398002017-06-21 14:41:24 -070012 Copyright (C) 2008-2017 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000034#include "pub_tool_poolalloc.h"
sewardjf98e1c02008-10-25 16:22:41 +000035#include "pub_tool_libcassert.h"
36#include "pub_tool_libcbase.h"
37#include "pub_tool_libcprint.h"
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_wordfm.h"
philippe328d6622015-05-25 17:24:27 +000040#include "pub_tool_hashtable.h"
sewardjf98e1c02008-10-25 16:22:41 +000041#include "pub_tool_xarray.h"
42#include "pub_tool_oset.h"
43#include "pub_tool_threadstate.h"
44#include "pub_tool_aspacemgr.h"
philippe328d6622015-05-25 17:24:27 +000045#include "pub_tool_stacktrace.h"
sewardjf98e1c02008-10-25 16:22:41 +000046#include "pub_tool_execontext.h"
47#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000048#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000049#include "hg_basics.h"
50#include "hg_wordset.h"
51#include "hg_lock_n_thread.h"
52#include "hg_errors.h"
53
54#include "libhb.h"
55
56
sewardj8f5374e2008-12-07 11:40:17 +000057/////////////////////////////////////////////////////////////////
58/////////////////////////////////////////////////////////////////
59// //
60// Debugging #defines //
61// //
62/////////////////////////////////////////////////////////////////
63/////////////////////////////////////////////////////////////////
64
65/* Check the sanity of shadow values in the core memory state
66 machine. Change #if 0 to #if 1 to enable this. */
67#if 0
68# define CHECK_MSM 1
69#else
70# define CHECK_MSM 0
71#endif
72
73
74/* Check sanity (reference counts, etc) in the conflicting access
75 machinery. Change #if 0 to #if 1 to enable this. */
76#if 0
77# define CHECK_CEM 1
78#else
79# define CHECK_CEM 0
80#endif
81
82
83/* Check sanity in the compressed shadow memory machinery,
84 particularly in its caching innards. Unfortunately there's no
85 almost-zero-cost way to make them selectable at run time. Hence
86 set the #if 0 to #if 1 and rebuild if you want them. */
87#if 0
88# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
89# define inline __attribute__((noinline))
90 /* probably want to ditch -fomit-frame-pointer too */
91#else
92# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
93#endif
94
95
96/////////////////////////////////////////////////////////////////
97/////////////////////////////////////////////////////////////////
98// //
sewardjffce8152011-06-24 10:09:41 +000099// data decls: VtsID //
100// //
101/////////////////////////////////////////////////////////////////
102/////////////////////////////////////////////////////////////////
103
104/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
105 bits, since they have to be packed into the lowest 30 bits of an
106 SVal. */
107typedef UInt VtsID;
108#define VtsID_INVALID 0xFFFFFFFF
109
110
111
112/////////////////////////////////////////////////////////////////
113/////////////////////////////////////////////////////////////////
114// //
115// data decls: SVal //
116// //
117/////////////////////////////////////////////////////////////////
118/////////////////////////////////////////////////////////////////
119
120typedef ULong SVal;
121
122/* This value has special significance to the implementation, and callers
123 may not store it in the shadow memory. */
124#define SVal_INVALID (3ULL << 62)
125
126/* This is the default value for shadow memory. Initially the shadow
127 memory contains no accessible areas and so all reads produce this
128 value. TODO: make this caller-defineable. */
129#define SVal_NOACCESS (2ULL << 62)
130
131
132
133/////////////////////////////////////////////////////////////////
134/////////////////////////////////////////////////////////////////
135// //
136// data decls: ScalarTS //
137// //
138/////////////////////////////////////////////////////////////////
139/////////////////////////////////////////////////////////////////
140
141/* Scalar Timestamp. We have to store a lot of these, so there is
142 some effort to make them as small as possible. Logically they are
143 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
144 We pack it into 64 bits by representing the Thr* using a ThrID, a
145 small integer (18 bits), and a 46 bit integer for the timestamp
florianad4e9792015-07-05 21:53:33 +0000146 number. The 46/18 split is arbitrary, but has the effect that
sewardjffce8152011-06-24 10:09:41 +0000147 Helgrind can only handle programs that create 2^18 or fewer threads
148 over their entire lifetime, and have no more than 2^46 timestamp
149 ticks (synchronisation operations on the same thread).
150
151 This doesn't seem like much of a limitation. 2^46 ticks is
152 7.06e+13, and if each tick (optimistically) takes the machine 1000
153 cycles to process, then the minimum time to process that many ticks
154 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
155 but VTS ticks, which isn't realistic.
156
philippe328d6622015-05-25 17:24:27 +0000157 NB1: SCALARTS_N_THRBITS must be 27 or lower. The obvious limit is
158 32 since a ThrID is a UInt. 27 comes from the fact that
sewardjffce8152011-06-24 10:09:41 +0000159 'Thr_n_RCEC', which records information about old accesses, packs
philippe328d6622015-05-25 17:24:27 +0000160 in tsw not only a ThrID but also minimum 4+1 other bits (access size
161 and writeness) in a UInt, hence limiting size to 32-(4+1) == 27.
sewardjffce8152011-06-24 10:09:41 +0000162
163 NB2: thrid values are issued upwards from 1024, and values less
164 than that aren't valid. This isn't per se necessary (any order
165 will do, so long as they are unique), but it does help ensure they
166 are less likely to get confused with the various other kinds of
philippe328d6622015-05-25 17:24:27 +0000167 small-integer thread ids drifting around (eg, TId).
168 So, SCALARTS_N_THRBITS must be 11 or more.
169 See also NB5.
sewardjffce8152011-06-24 10:09:41 +0000170
171 NB3: this probably also relies on the fact that Thr's are never
172 deallocated -- they exist forever. Hence the 1-1 mapping from
173 Thr's to thrid values (set up in Thr__new) persists forever.
174
175 NB4: temp_max_sized_VTS is allocated at startup and never freed.
176 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
177 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
178 making the memory use for this go sky-high. With
179 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
180 like an OK tradeoff. If more than 256k threads need to be
181 supported, we could change SCALARTS_N_THRBITS to 20, which would
182 facilitate supporting 1 million threads at the cost of 8MB storage
183 for temp_max_sized_VTS.
184
185 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
186 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
187 must never be a valid ThrID. Given NB2 that's OK.
188*/
philippe328d6622015-05-25 17:24:27 +0000189#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 27 inclusive,
190 See NB1 and NB2 above. */
sewardjffce8152011-06-24 10:09:41 +0000191
192#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
193typedef
194 struct {
195 ThrID thrid : SCALARTS_N_THRBITS;
196 ULong tym : SCALARTS_N_TYMBITS;
197 }
198 ScalarTS;
199
200#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
201
202
203
204/////////////////////////////////////////////////////////////////
205/////////////////////////////////////////////////////////////////
206// //
207// data decls: Filter //
208// //
209/////////////////////////////////////////////////////////////////
210/////////////////////////////////////////////////////////////////
211
212// baseline: 5, 9
213#define FI_LINE_SZB_LOG2 5
214#define FI_NUM_LINES_LOG2 10
215
216#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
217#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
218
219#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
220#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
221
222#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
223 & (Addr)(FI_NUM_LINES-1) )
224
225
226/* In the lines, each 8 bytes are treated individually, and are mapped
227 to a UShort. Regardless of endianness of the underlying machine,
228 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
229 the highest address.
230
231 Of each bit pair, the higher numbered bit is set if a R has been
232 seen, so the actual layout is:
233
234 15 14 ... 01 00
235
236 R W for addr+7 ... R W for addr+0
237
238 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
239*/
240
241/* tags are separated from lines. tags are Addrs and are
242 the base address of the line. */
243typedef
244 struct {
245 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
246 }
247 FiLine;
248
249typedef
250 struct {
251 Addr tags[FI_NUM_LINES];
252 FiLine lines[FI_NUM_LINES];
253 }
254 Filter;
255
256
257
258/////////////////////////////////////////////////////////////////
259/////////////////////////////////////////////////////////////////
260// //
261// data decls: Thr, ULong_n_EC //
262// //
263/////////////////////////////////////////////////////////////////
264/////////////////////////////////////////////////////////////////
265
266// Records stacks for H1 history mechanism (DRD-style)
267typedef
268 struct { ULong ull; ExeContext* ec; }
269 ULong_n_EC;
270
271
272/* How many of the above records to collect for each thread? Older
273 ones are dumped when we run out of space. 62.5k requires 1MB per
274 thread, since each ULong_n_EC record is 16 bytes long. When more
275 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
276 deleted to make space. Hence in the worst case we will be able to
277 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
278 Kw transitions (segments in this thread). For the current setting
279 that gives a guaranteed stack for at least the last 31.25k
280 segments. */
281#define N_KWs_N_STACKs_PER_THREAD 62500
282
283
284struct _Thr {
285 /* Current VTSs for this thread. They change as we go along. viR
286 is the VTS to be used for reads, viW for writes. Usually they
287 are the same, but can differ when we deal with reader-writer
288 locks. It is always the case that
289 VtsID__cmpLEQ(viW,viR) == True
290 that is, viW must be the same, or lagging behind, viR. */
291 VtsID viR;
292 VtsID viW;
293
294 /* Is initially False, and is set to True after the thread really
295 has done a low-level exit. When True, we expect to never see
296 any more memory references done by this thread. */
297 Bool llexit_done;
298
299 /* Is initially False, and is set to True after the thread has been
300 joined with (reaped by some other thread). After this point, we
301 do not expect to see any uses of .viR or .viW, so it is safe to
302 set them to VtsID_INVALID. */
303 Bool joinedwith_done;
304
305 /* A small integer giving a unique identity to this Thr. See
306 comments on the definition of ScalarTS for details. */
307 ThrID thrid : SCALARTS_N_THRBITS;
308
309 /* A filter that removes references for which we believe that
310 msmcread/msmcwrite will not change the state, nor report a
311 race. */
312 Filter* filter;
313
314 /* A pointer back to the top level Thread structure. There is a
315 1-1 mapping between Thread and Thr structures -- each Thr points
316 at its corresponding Thread, and vice versa. Really, Thr and
317 Thread should be merged into a single structure. */
318 Thread* hgthread;
319
320 /* The ULongs (scalar Kws) in this accumulate in strictly
321 increasing order, without duplicates. This is important because
322 we need to be able to find a given scalar Kw in this array
323 later, by binary search. */
324 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
325};
326
327
328
329/////////////////////////////////////////////////////////////////
330/////////////////////////////////////////////////////////////////
331// //
332// data decls: SO //
333// //
334/////////////////////////////////////////////////////////////////
335/////////////////////////////////////////////////////////////////
336
337// (UInt) `echo "Synchronisation object" | md5sum`
338#define SO_MAGIC 0x56b3c5b0U
339
340struct _SO {
341 struct _SO* admin_prev;
342 struct _SO* admin_next;
343 VtsID viR; /* r-clock of sender */
344 VtsID viW; /* w-clock of sender */
345 UInt magic;
346};
347
348
349
350/////////////////////////////////////////////////////////////////
351/////////////////////////////////////////////////////////////////
352// //
sewardj8f5374e2008-12-07 11:40:17 +0000353// Forward declarations //
354// //
355/////////////////////////////////////////////////////////////////
356/////////////////////////////////////////////////////////////////
357
sewardjf98e1c02008-10-25 16:22:41 +0000358/* fwds for
359 Globals needed by other parts of the library. These are set
360 once at startup and then never changed. */
361static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000362static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000363
sewardjffce8152011-06-24 10:09:41 +0000364/* misc fn and data fwdses */
365static void VtsID__rcinc ( VtsID ii );
366static void VtsID__rcdec ( VtsID ii );
367
368static inline Bool SVal__isC ( SVal s );
369static inline VtsID SVal__unC_Rmin ( SVal s );
370static inline VtsID SVal__unC_Wmin ( SVal s );
371static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
philippe1475a7f2015-05-11 19:45:08 +0000372static inline void SVal__rcinc ( SVal s );
373static inline void SVal__rcdec ( SVal s );
philippe71ed3c92015-05-17 19:32:42 +0000374/* SVal in LineZ are used to store various pointers. */
375static inline void *SVal2Ptr (SVal s);
376static inline SVal Ptr2SVal (void* ptr);
sewardjffce8152011-06-24 10:09:41 +0000377
378/* A double linked list of all the SO's. */
379SO* admin_SO;
380
sewardjf98e1c02008-10-25 16:22:41 +0000381
382
383/////////////////////////////////////////////////////////////////
384/////////////////////////////////////////////////////////////////
385// //
386// SECTION BEGIN compressed shadow memory //
387// //
388/////////////////////////////////////////////////////////////////
389/////////////////////////////////////////////////////////////////
390
391#ifndef __HB_ZSM_H
392#define __HB_ZSM_H
393
sewardjf98e1c02008-10-25 16:22:41 +0000394/* Initialise the library. Once initialised, it will (or may) call
philippe1475a7f2015-05-11 19:45:08 +0000395 SVal__rcinc and SVal__rcdec in response to all the calls below, in order to
sewardjf98e1c02008-10-25 16:22:41 +0000396 allow the user to do reference counting on the SVals stored herein.
397 It is important to understand, however, that due to internal
398 caching, the reference counts are in general inaccurate, and can be
399 both above or below the true reference count for an item. In
400 particular, the library may indicate that the reference count for
401 an item is zero, when in fact it is not.
402
403 To make the reference counting exact and therefore non-pointless,
404 call zsm_flush_cache. Immediately after it returns, the reference
405 counts for all items, as deduced by the caller by observing calls
philippe1475a7f2015-05-11 19:45:08 +0000406 to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a
407 zero reference count may be freed (or at least considered to be
sewardjf98e1c02008-10-25 16:22:41 +0000408 unreferenced by this library).
409*/
philippe1475a7f2015-05-11 19:45:08 +0000410static void zsm_init ( void );
sewardjf98e1c02008-10-25 16:22:41 +0000411
sewardj23f12002009-07-24 08:45:08 +0000412static void zsm_sset_range ( Addr, SizeT, SVal );
philippef54cb662015-05-10 22:19:31 +0000413static void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew );
sewardj23f12002009-07-24 08:45:08 +0000414static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000415static void zsm_flush_cache ( void );
416
417#endif /* ! __HB_ZSM_H */
418
419
sewardjf98e1c02008-10-25 16:22:41 +0000420/* Round a up to the next multiple of N. N must be a power of 2 */
421#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
422/* Round a down to the next multiple of N. N must be a power of 2 */
423#define ROUNDDN(a, N) ((a) & ~(N-1))
424
philippef54cb662015-05-10 22:19:31 +0000425/* True if a belongs in range [start, start + szB[
426 (i.e. start + szB is excluded). */
427static inline Bool address_in_range (Addr a, Addr start, SizeT szB)
428{
429 /* Checking start <= a && a < start + szB.
430 As start and a are unsigned addresses, the condition can
431 be simplified. */
432 if (CHECK_ZSM)
433 tl_assert ((a - start < szB)
434 == (start <= a
435 && a < start + szB));
436 return a - start < szB;
437}
sewardjf98e1c02008-10-25 16:22:41 +0000438
sewardjf98e1c02008-10-25 16:22:41 +0000439/* ------ CacheLine ------ */
440
441#define N_LINE_BITS 6 /* must be >= 3 */
442#define N_LINE_ARANGE (1 << N_LINE_BITS)
443#define N_LINE_TREES (N_LINE_ARANGE >> 3)
444
445typedef
446 struct {
447 UShort descrs[N_LINE_TREES];
448 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
449 }
450 CacheLine;
451
452#define TREE_DESCR_16_0 (1<<0)
453#define TREE_DESCR_32_0 (1<<1)
454#define TREE_DESCR_16_1 (1<<2)
455#define TREE_DESCR_64 (1<<3)
456#define TREE_DESCR_16_2 (1<<4)
457#define TREE_DESCR_32_1 (1<<5)
458#define TREE_DESCR_16_3 (1<<6)
459#define TREE_DESCR_8_0 (1<<7)
460#define TREE_DESCR_8_1 (1<<8)
461#define TREE_DESCR_8_2 (1<<9)
462#define TREE_DESCR_8_3 (1<<10)
463#define TREE_DESCR_8_4 (1<<11)
464#define TREE_DESCR_8_5 (1<<12)
465#define TREE_DESCR_8_6 (1<<13)
466#define TREE_DESCR_8_7 (1<<14)
467#define TREE_DESCR_DTY (1<<15)
468
469typedef
470 struct {
471 SVal dict[4]; /* can represent up to 4 diff values in the line */
472 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
473 dict indexes */
philippe71ed3c92015-05-17 19:32:42 +0000474 /* if dict[0] == SVal_INVALID then dict[1] is a pointer to the
sewardjf98e1c02008-10-25 16:22:41 +0000475 LineF to use, and dict[2..] are also SVal_INVALID. */
476 }
477 LineZ; /* compressed rep for a cache line */
478
philippe71ed3c92015-05-17 19:32:42 +0000479/* LineZ.dict[1] is used to store various pointers:
480 * In the first lineZ of a free SecMap, it points to the next free SecMap.
481 * In a lineZ for which we need to use a lineF, it points to the lineF. */
482
483
sewardjf98e1c02008-10-25 16:22:41 +0000484typedef
485 struct {
sewardjf98e1c02008-10-25 16:22:41 +0000486 SVal w64s[N_LINE_ARANGE];
487 }
488 LineF; /* full rep for a cache line */
489
philippe71ed3c92015-05-17 19:32:42 +0000490/* We use a pool allocator for LineF, as LineF is relatively small,
491 and we will often alloc/release such lines. */
492static PoolAlloc* LineF_pool_allocator;
493
494/* SVal in a lineZ are used to store various pointers.
495 Below are conversion functions to support that. */
496static inline LineF *LineF_Ptr (LineZ *lineZ)
497{
498 tl_assert(lineZ->dict[0] == SVal_INVALID);
499 return SVal2Ptr (lineZ->dict[1]);
500}
501
sewardjf98e1c02008-10-25 16:22:41 +0000502/* Shadow memory.
503 Primary map is a WordFM Addr SecMap*.
504 SecMaps cover some page-size-ish section of address space and hold
505 a compressed representation.
506 CacheLine-sized chunks of SecMaps are copied into a Cache, being
507 decompressed when moved into the cache and recompressed on the
508 way out. Because of this, the cache must operate as a writeback
509 cache, not a writethrough one.
510
511 Each SecMap must hold a power-of-2 number of CacheLines. Hence
512 N_SECMAP_BITS must >= N_LINE_BITS.
513*/
514#define N_SECMAP_BITS 13
515#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
516
517// # CacheLines held by a SecMap
518#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
519
520/* The data in the SecMap is held in the array of LineZs. Each LineZ
521 either carries the required data directly, in a compressed
philippe71ed3c92015-05-17 19:32:42 +0000522 representation, or it holds (in .dict[1]) a pointer to a LineF
523 that holds the full representation.
sewardjf98e1c02008-10-25 16:22:41 +0000524
philippe71ed3c92015-05-17 19:32:42 +0000525 As each in-use LineF is referred to by exactly one LineZ,
526 the number of .linesZ[] that refer to a lineF should equal
527 the number of used lineF.
sewardjf98e1c02008-10-25 16:22:41 +0000528
529 RC obligations: the RCs presented to the user include exactly
530 the values in:
531 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
philippe71ed3c92015-05-17 19:32:42 +0000532 * F reps that are in use
sewardjf98e1c02008-10-25 16:22:41 +0000533
534 Hence the following actions at the following transitions are required:
535
philippe71ed3c92015-05-17 19:32:42 +0000536 F rep: alloc'd -> freed -- rcdec_LineF
537 F rep: -> alloc'd -- rcinc_LineF
sewardjf98e1c02008-10-25 16:22:41 +0000538 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
539 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
540*/
philippe71ed3c92015-05-17 19:32:42 +0000541
sewardjf98e1c02008-10-25 16:22:41 +0000542typedef
543 struct {
544 UInt magic;
545 LineZ linesZ[N_SECMAP_ZLINES];
sewardjf98e1c02008-10-25 16:22:41 +0000546 }
547 SecMap;
548
549#define SecMap_MAGIC 0x571e58cbU
550
philippef54cb662015-05-10 22:19:31 +0000551// (UInt) `echo "Free SecMap" | md5sum`
552#define SecMap_free_MAGIC 0x5a977f30U
553
sewardj5aa09bf2014-06-20 14:25:53 +0000554__attribute__((unused))
sewardjf98e1c02008-10-25 16:22:41 +0000555static inline Bool is_sane_SecMap ( SecMap* sm ) {
556 return sm != NULL && sm->magic == SecMap_MAGIC;
557}
558
559/* ------ Cache ------ */
560
561#define N_WAY_BITS 16
562#define N_WAY_NENT (1 << N_WAY_BITS)
563
564/* Each tag is the address of the associated CacheLine, rounded down
565 to a CacheLine address boundary. A CacheLine size must be a power
566 of 2 and must be 8 or more. Hence an easy way to initialise the
567 cache so it is empty is to set all the tag values to any value % 8
568 != 0, eg 1. This means all queries in the cache initially miss.
569 It does however require us to detect and not writeback, any line
570 with a bogus tag. */
571typedef
572 struct {
573 CacheLine lyns0[N_WAY_NENT];
574 Addr tags0[N_WAY_NENT];
575 }
576 Cache;
577
578static inline Bool is_valid_scache_tag ( Addr tag ) {
579 /* a valid tag should be naturally aligned to the start of
580 a CacheLine. */
581 return 0 == (tag & (N_LINE_ARANGE - 1));
582}
583
584
585/* --------- Primary data structures --------- */
586
587/* Shadow memory primary map */
588static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
589static Cache cache_shmem;
590
591
592static UWord stats__secmaps_search = 0; // # SM finds
593static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
594static UWord stats__secmaps_allocd = 0; // # SecMaps issued
philippef54cb662015-05-10 22:19:31 +0000595static UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live'
596static UWord stats__secmaps_scanGC = 0; // # nr of scan GC done.
597static UWord stats__secmaps_scanGCed = 0; // # SecMaps GC-ed via scan
598static UWord stats__secmaps_ssetGCed = 0; // # SecMaps GC-ed via setnoaccess
sewardjf98e1c02008-10-25 16:22:41 +0000599static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
600static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
601static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
sewardjf98e1c02008-10-25 16:22:41 +0000602static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
603static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
604static UWord stats__cache_F_fetches = 0; // # F lines fetched
605static UWord stats__cache_F_wbacks = 0; // # F lines written back
philippef54cb662015-05-10 22:19:31 +0000606static UWord stats__cache_flushes_invals = 0; // # cache flushes and invals
sewardjf98e1c02008-10-25 16:22:41 +0000607static UWord stats__cache_totrefs = 0; // # total accesses
608static UWord stats__cache_totmisses = 0; // # misses
609static ULong stats__cache_make_New_arange = 0; // total arange made New
610static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
611static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000612static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
613static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
614static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
615static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
616static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
617static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
618static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
619static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
620static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
621static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
622static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
623static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
624static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
625static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000626static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
627static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
628static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
629static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
630static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
631static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000632static UWord stats__vts__tick = 0; // # calls to VTS__tick
633static UWord stats__vts__join = 0; // # calls to VTS__join
634static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
635static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
philippef54cb662015-05-10 22:19:31 +0000636static UWord stats__vts_tab_GC = 0; // # nr of vts_tab GC
philippe2bd23262015-05-11 20:56:49 +0000637static UWord stats__vts_pruning = 0; // # nr of vts pruning
sewardj7aa38a92011-02-27 23:04:12 +0000638
639// # calls to VTS__cmp_structural w/ slow case
640static UWord stats__vts__cmp_structural_slow = 0;
641
642// # calls to VTS__indexAt_SLOW
643static UWord stats__vts__indexat_slow = 0;
644
645// # calls to vts_set__find__or__clone_and_add
646static UWord stats__vts_set__focaa = 0;
647
648// # calls to vts_set__find__or__clone_and_add that lead to an
649// allocation
650static UWord stats__vts_set__focaa_a = 0;
sewardjc8028ad2010-05-05 09:34:42 +0000651
sewardjf98e1c02008-10-25 16:22:41 +0000652
653static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
654 return a & ~(N_SECMAP_ARANGE - 1);
655}
656static inline UWord shmem__get_SecMap_offset ( Addr a ) {
657 return a & (N_SECMAP_ARANGE - 1);
658}
659
660
661/*----------------------------------------------------------------*/
662/*--- map_shmem :: WordFM Addr SecMap ---*/
663/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
664/*----------------------------------------------------------------*/
665
666/*--------------- SecMap allocation --------------- */
667
668static HChar* shmem__bigchunk_next = NULL;
669static HChar* shmem__bigchunk_end1 = NULL;
670
671static void* shmem__bigchunk_alloc ( SizeT n )
672{
673 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
674 tl_assert(n > 0);
675 n = VG_ROUNDUP(n, 16);
676 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
677 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
678 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
679 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
680 if (0)
681 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
682 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
683 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
684 if (shmem__bigchunk_next == NULL)
685 VG_(out_of_memory_NORETURN)(
686 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
687 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
688 }
689 tl_assert(shmem__bigchunk_next);
690 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
691 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
692 shmem__bigchunk_next += n;
693 return shmem__bigchunk_next - n;
694}
695
philippef54cb662015-05-10 22:19:31 +0000696/* SecMap changed to be fully SVal_NOACCESS are inserted in a list of
697 recycled SecMap. When a new SecMap is needed, a recycled SecMap
698 will be used in preference to allocating a new SecMap. */
philippe71ed3c92015-05-17 19:32:42 +0000699/* We make a linked list of SecMap. The first LineZ is re-used to
700 implement the linked list. */
701/* Returns the SecMap following sm in the free list.
702 NULL if sm is the last SecMap. sm must be on the free list. */
703static inline SecMap *SecMap_freelist_next ( SecMap* sm )
704{
705 tl_assert (sm);
706 tl_assert (sm->magic == SecMap_free_MAGIC);
707 return SVal2Ptr (sm->linesZ[0].dict[1]);
708}
709static inline void set_SecMap_freelist_next ( SecMap* sm, SecMap* next )
710{
711 tl_assert (sm);
712 tl_assert (sm->magic == SecMap_free_MAGIC);
713 tl_assert (next == NULL || next->magic == SecMap_free_MAGIC);
714 sm->linesZ[0].dict[1] = Ptr2SVal (next);
715}
716
philippef54cb662015-05-10 22:19:31 +0000717static SecMap *SecMap_freelist = NULL;
718static UWord SecMap_freelist_length(void)
719{
720 SecMap *sm;
721 UWord n = 0;
722
723 sm = SecMap_freelist;
724 while (sm) {
725 n++;
philippe71ed3c92015-05-17 19:32:42 +0000726 sm = SecMap_freelist_next (sm);
philippef54cb662015-05-10 22:19:31 +0000727 }
728 return n;
729}
730
731static void push_SecMap_on_freelist(SecMap* sm)
732{
733 if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm);
734 sm->magic = SecMap_free_MAGIC;
philippe71ed3c92015-05-17 19:32:42 +0000735 set_SecMap_freelist_next(sm, SecMap_freelist);
philippef54cb662015-05-10 22:19:31 +0000736 SecMap_freelist = sm;
737}
738/* Returns a free SecMap if there is one.
739 Otherwise, returns NULL. */
740static SecMap *pop_SecMap_from_freelist(void)
741{
742 SecMap *sm;
743
744 sm = SecMap_freelist;
745 if (sm) {
746 tl_assert (sm->magic == SecMap_free_MAGIC);
philippe71ed3c92015-05-17 19:32:42 +0000747 SecMap_freelist = SecMap_freelist_next (sm);
philippef54cb662015-05-10 22:19:31 +0000748 if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm);
749 }
750 return sm;
751}
752
753static SecMap* shmem__alloc_or_recycle_SecMap ( void )
sewardjf98e1c02008-10-25 16:22:41 +0000754{
755 Word i, j;
philippef54cb662015-05-10 22:19:31 +0000756 SecMap* sm = pop_SecMap_from_freelist();
757
758 if (!sm) {
759 sm = shmem__bigchunk_alloc( sizeof(SecMap) );
760 stats__secmaps_allocd++;
761 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
762 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
763 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
764 }
sewardjf98e1c02008-10-25 16:22:41 +0000765 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
766 tl_assert(sm);
767 sm->magic = SecMap_MAGIC;
768 for (i = 0; i < N_SECMAP_ZLINES; i++) {
769 sm->linesZ[i].dict[0] = SVal_NOACCESS;
770 sm->linesZ[i].dict[1] = SVal_INVALID;
771 sm->linesZ[i].dict[2] = SVal_INVALID;
772 sm->linesZ[i].dict[3] = SVal_INVALID;
773 for (j = 0; j < N_LINE_ARANGE/4; j++)
774 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
775 }
sewardjf98e1c02008-10-25 16:22:41 +0000776 return sm;
777}
778
779typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
780static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
781
782static SecMap* shmem__find_SecMap ( Addr ga )
783{
784 SecMap* sm = NULL;
785 Addr gaKey = shmem__round_to_SecMap_base(ga);
786 // Cache
787 stats__secmaps_search++;
788 if (LIKELY(gaKey == smCache[0].gaKey))
789 return smCache[0].sm;
790 if (LIKELY(gaKey == smCache[1].gaKey)) {
791 SMCacheEnt tmp = smCache[0];
792 smCache[0] = smCache[1];
793 smCache[1] = tmp;
794 return smCache[0].sm;
795 }
796 if (gaKey == smCache[2].gaKey) {
797 SMCacheEnt tmp = smCache[1];
798 smCache[1] = smCache[2];
799 smCache[2] = tmp;
800 return smCache[1].sm;
801 }
802 // end Cache
803 stats__secmaps_search_slow++;
804 if (VG_(lookupFM)( map_shmem,
805 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
806 tl_assert(sm != NULL);
807 smCache[2] = smCache[1];
808 smCache[1] = smCache[0];
809 smCache[0].gaKey = gaKey;
810 smCache[0].sm = sm;
811 } else {
812 tl_assert(sm == NULL);
813 }
814 return sm;
815}
816
philippef54cb662015-05-10 22:19:31 +0000817/* Scan the SecMap and count the SecMap that can be GC-ed.
818 If really, really does the GC of the SecMap. */
819/* NOT TO BE CALLED FROM WITHIN libzsm. */
820static UWord next_SecMap_GC_at = 1000;
821__attribute__((noinline))
822static UWord shmem__SecMap_do_GC(Bool really)
823{
824 UWord secmapW = 0;
825 Addr gaKey;
826 UWord examined = 0;
827 UWord ok_GCed = 0;
828
829 /* First invalidate the smCache */
830 smCache[0].gaKey = 1;
831 smCache[1].gaKey = 1;
832 smCache[2].gaKey = 1;
833 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0]));
834
835 VG_(initIterFM)( map_shmem );
836 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
837 UWord i;
838 UWord j;
philippe71ed3c92015-05-17 19:32:42 +0000839 UWord n_linesF = 0;
philippef54cb662015-05-10 22:19:31 +0000840 SecMap* sm = (SecMap*)secmapW;
841 tl_assert(sm->magic == SecMap_MAGIC);
842 Bool ok_to_GC = True;
843
844 examined++;
845
philippe71ed3c92015-05-17 19:32:42 +0000846 /* Deal with the LineZs and the possible LineF of a LineZ. */
philippef54cb662015-05-10 22:19:31 +0000847 for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) {
848 LineZ* lineZ = &sm->linesZ[i];
philippe71ed3c92015-05-17 19:32:42 +0000849 if (lineZ->dict[0] != SVal_INVALID) {
850 ok_to_GC = lineZ->dict[0] == SVal_NOACCESS
philippef54cb662015-05-10 22:19:31 +0000851 && !SVal__isC (lineZ->dict[1])
852 && !SVal__isC (lineZ->dict[2])
philippe71ed3c92015-05-17 19:32:42 +0000853 && !SVal__isC (lineZ->dict[3]);
854 } else {
855 LineF *lineF = LineF_Ptr(lineZ);
856 n_linesF++;
857 for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++)
858 ok_to_GC = lineF->w64s[j] == SVal_NOACCESS;
859 }
philippef54cb662015-05-10 22:19:31 +0000860 }
861 if (ok_to_GC)
862 ok_GCed++;
863 if (ok_to_GC && really) {
864 SecMap *fm_sm;
865 Addr fm_gaKey;
866 /* We cannot remove a SecMap from map_shmem while iterating.
867 So, stop iteration, remove from map_shmem, recreate the iteration
868 on the next SecMap. */
869 VG_(doneIterFM) ( map_shmem );
philippe71ed3c92015-05-17 19:32:42 +0000870 /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS.
871 We just need to free the lineF referenced by the linesZ. */
872 if (n_linesF > 0) {
873 for (i = 0; i < N_SECMAP_ZLINES && n_linesF > 0; i++) {
874 LineZ* lineZ = &sm->linesZ[i];
875 if (lineZ->dict[0] == SVal_INVALID) {
876 VG_(freeEltPA)( LineF_pool_allocator, LineF_Ptr(lineZ) );
877 n_linesF--;
878 }
879 }
philippef54cb662015-05-10 22:19:31 +0000880 }
881 if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey))
882 tl_assert (0);
883 stats__secmaps_in_map_shmem--;
884 tl_assert (gaKey == fm_gaKey);
885 tl_assert (sm == fm_sm);
886 stats__secmaps_scanGCed++;
887 push_SecMap_on_freelist (sm);
888 VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE);
889 }
890 }
891 VG_(doneIterFM)( map_shmem );
892
893 if (really) {
894 stats__secmaps_scanGC++;
895 /* Next GC when we approach the max allocated */
896 next_SecMap_GC_at = stats__secmaps_allocd - 1000;
897 /* Unless we GCed less than 10%. We then allow to alloc 10%
898 more before GCing. This avoids doing a lot of costly GC
899 for the worst case : the 'growing phase' of an application
900 that allocates a lot of memory.
901 Worst can can be reproduced e.g. by
902 perf/memrw -t 30000000 -b 1000 -r 1 -l 1
903 that allocates around 30Gb of memory. */
904 if (ok_GCed < stats__secmaps_allocd/10)
905 next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10;
906
907 }
908
909 if (VG_(clo_stats) && really) {
910 VG_(message)(Vg_DebugMsg,
911 "libhb: SecMap GC: #%lu scanned %lu, GCed %lu,"
912 " next GC at %lu\n",
913 stats__secmaps_scanGC, examined, ok_GCed,
914 next_SecMap_GC_at);
915 }
916
917 return ok_GCed;
918}
919
sewardjf98e1c02008-10-25 16:22:41 +0000920static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
921{
922 SecMap* sm = shmem__find_SecMap ( ga );
923 if (LIKELY(sm)) {
philippef54cb662015-05-10 22:19:31 +0000924 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000925 return sm;
926 } else {
927 /* create a new one */
928 Addr gaKey = shmem__round_to_SecMap_base(ga);
philippef54cb662015-05-10 22:19:31 +0000929 sm = shmem__alloc_or_recycle_SecMap();
sewardjf98e1c02008-10-25 16:22:41 +0000930 tl_assert(sm);
931 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
philippef54cb662015-05-10 22:19:31 +0000932 stats__secmaps_in_map_shmem++;
933 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000934 return sm;
935 }
936}
937
philippe0fb30ac2015-05-15 13:17:17 +0000938/* Returns the nr of linesF which are in use. Note: this is scanning
939 the secmap wordFM. So, this is to be used for statistics only. */
940__attribute__((noinline))
941static UWord shmem__SecMap_used_linesF(void)
942{
943 UWord secmapW = 0;
944 Addr gaKey;
945 UWord inUse = 0;
philippe0fb30ac2015-05-15 13:17:17 +0000946
947 VG_(initIterFM)( map_shmem );
948 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
949 UWord i;
950 SecMap* sm = (SecMap*)secmapW;
951 tl_assert(sm->magic == SecMap_MAGIC);
952
philippe71ed3c92015-05-17 19:32:42 +0000953 for (i = 0; i < N_SECMAP_ZLINES; i++) {
954 LineZ* lineZ = &sm->linesZ[i];
955 if (lineZ->dict[0] == SVal_INVALID)
philippe0fb30ac2015-05-15 13:17:17 +0000956 inUse++;
philippe0fb30ac2015-05-15 13:17:17 +0000957 }
958 }
959 VG_(doneIterFM)( map_shmem );
philippe0fb30ac2015-05-15 13:17:17 +0000960
961 return inUse;
962}
sewardjf98e1c02008-10-25 16:22:41 +0000963
964/* ------------ LineF and LineZ related ------------ */
965
966static void rcinc_LineF ( LineF* lineF ) {
967 UWord i;
sewardjf98e1c02008-10-25 16:22:41 +0000968 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000969 SVal__rcinc(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000970}
971
972static void rcdec_LineF ( LineF* lineF ) {
973 UWord i;
sewardjf98e1c02008-10-25 16:22:41 +0000974 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000975 SVal__rcdec(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000976}
977
978static void rcinc_LineZ ( LineZ* lineZ ) {
979 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000980 SVal__rcinc(lineZ->dict[0]);
981 if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]);
982 if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]);
983 if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000984}
985
986static void rcdec_LineZ ( LineZ* lineZ ) {
987 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000988 SVal__rcdec(lineZ->dict[0]);
989 if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]);
990 if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]);
991 if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000992}
993
994inline
995static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
996 Word bix, shft, mask, prep;
997 tl_assert(ix >= 0);
998 bix = ix >> 2;
999 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1000 mask = 3 << shft;
1001 prep = b2 << shft;
1002 arr[bix] = (arr[bix] & ~mask) | prep;
1003}
1004
1005inline
1006static UWord read_twobit_array ( UChar* arr, UWord ix ) {
1007 Word bix, shft;
1008 tl_assert(ix >= 0);
1009 bix = ix >> 2;
1010 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1011 return (arr[bix] >> shft) & 3;
1012}
1013
philippe7ab06462015-05-17 21:36:05 +00001014/* We cache one free lineF, to avoid pool allocator calls.
1015 Measurement on firefox has shown that this avoids more than 90%
1016 of the PA calls. */
1017static LineF *free_lineF = NULL;
1018
philippe71ed3c92015-05-17 19:32:42 +00001019/* Allocates a lineF for LineZ. Sets lineZ in a state indicating
1020 lineF has to be used. */
1021static inline LineF *alloc_LineF_for_Z (LineZ *lineZ)
1022{
1023 LineF *lineF;
1024
1025 tl_assert(lineZ->dict[0] == SVal_INVALID);
1026
philippe7ab06462015-05-17 21:36:05 +00001027 if (LIKELY(free_lineF)) {
1028 lineF = free_lineF;
1029 free_lineF = NULL;
1030 } else {
1031 lineF = VG_(allocEltPA) ( LineF_pool_allocator );
1032 }
philippe71ed3c92015-05-17 19:32:42 +00001033 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1034 lineZ->dict[1] = Ptr2SVal (lineF);
1035
1036 return lineF;
1037}
1038
1039/* rcdec the LineF of lineZ, frees the lineF, and sets lineZ
1040 back to its initial state SVal_NOACCESS (i.e. ready to be
1041 read or written just after SecMap allocation). */
1042static inline void clear_LineF_of_Z (LineZ *lineZ)
1043{
1044 LineF *lineF = LineF_Ptr(lineZ);
1045
1046 rcdec_LineF(lineF);
philippe7ab06462015-05-17 21:36:05 +00001047 if (UNLIKELY(free_lineF)) {
1048 VG_(freeEltPA)( LineF_pool_allocator, lineF );
1049 } else {
1050 free_lineF = lineF;
1051 }
philippe71ed3c92015-05-17 19:32:42 +00001052 lineZ->dict[0] = SVal_NOACCESS;
1053 lineZ->dict[1] = SVal_INVALID;
1054}
1055
sewardjf98e1c02008-10-25 16:22:41 +00001056/* Given address 'tag', find either the Z or F line containing relevant
1057 data, so it can be read into the cache.
1058*/
1059static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
1060 /*OUT*/LineF** fp, Addr tag ) {
1061 LineZ* lineZ;
1062 LineF* lineF;
1063 UWord zix;
1064 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1065 UWord smoff = shmem__get_SecMap_offset(tag);
1066 /* since smoff is derived from a valid tag, it should be
1067 cacheline-aligned. */
1068 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1069 zix = smoff >> N_LINE_BITS;
1070 tl_assert(zix < N_SECMAP_ZLINES);
1071 lineZ = &sm->linesZ[zix];
1072 lineF = NULL;
1073 if (lineZ->dict[0] == SVal_INVALID) {
philippe71ed3c92015-05-17 19:32:42 +00001074 lineF = LineF_Ptr (lineZ);
sewardjf98e1c02008-10-25 16:22:41 +00001075 lineZ = NULL;
1076 }
1077 *zp = lineZ;
1078 *fp = lineF;
1079}
1080
1081/* Given address 'tag', return the relevant SecMap and the index of
1082 the LineZ within it, in the expectation that the line is to be
1083 overwritten. Regardless of whether 'tag' is currently associated
1084 with a Z or F representation, to rcdec on the current
1085 representation, in recognition of the fact that the contents are
1086 just about to be overwritten. */
1087static __attribute__((noinline))
1088void find_Z_for_writing ( /*OUT*/SecMap** smp,
1089 /*OUT*/Word* zixp,
1090 Addr tag ) {
1091 LineZ* lineZ;
sewardjf98e1c02008-10-25 16:22:41 +00001092 UWord zix;
1093 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1094 UWord smoff = shmem__get_SecMap_offset(tag);
1095 /* since smoff is derived from a valid tag, it should be
1096 cacheline-aligned. */
1097 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1098 zix = smoff >> N_LINE_BITS;
1099 tl_assert(zix < N_SECMAP_ZLINES);
1100 lineZ = &sm->linesZ[zix];
philippe71ed3c92015-05-17 19:32:42 +00001101 /* re RCs, we are rcdec_LineZ/clear_LineF_of_Z this LineZ so that new data
1102 can be parked in it. Hence have to rcdec it accordingly. */
sewardjf98e1c02008-10-25 16:22:41 +00001103 /* If lineZ has an associated lineF, free it up. */
philippe71ed3c92015-05-17 19:32:42 +00001104 if (lineZ->dict[0] == SVal_INVALID)
1105 clear_LineF_of_Z(lineZ);
1106 else
sewardjf98e1c02008-10-25 16:22:41 +00001107 rcdec_LineZ(lineZ);
sewardjf98e1c02008-10-25 16:22:41 +00001108 *smp = sm;
1109 *zixp = zix;
1110}
1111
sewardjf98e1c02008-10-25 16:22:41 +00001112/* ------------ CacheLine and implicit-tree related ------------ */
1113
1114__attribute__((unused))
1115static void pp_CacheLine ( CacheLine* cl ) {
1116 Word i;
1117 if (!cl) {
1118 VG_(printf)("%s","pp_CacheLine(NULL)\n");
1119 return;
1120 }
1121 for (i = 0; i < N_LINE_TREES; i++)
1122 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
1123 for (i = 0; i < N_LINE_ARANGE; i++)
1124 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
1125}
1126
1127static UChar descr_to_validbits ( UShort descr )
1128{
1129 /* a.k.a Party Time for gcc's constant folder */
1130# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
1131 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
1132 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
1133 ( (b8_5) << 12) | ( (b8_4) << 11) | \
1134 ( (b8_3) << 10) | ( (b8_2) << 9) | \
1135 ( (b8_1) << 8) | ( (b8_0) << 7) | \
1136 ( (b16_3) << 6) | ( (b32_1) << 5) | \
1137 ( (b16_2) << 4) | ( (b64) << 3) | \
1138 ( (b16_1) << 2) | ( (b32_0) << 1) | \
1139 ( (b16_0) << 0) ) )
1140
1141# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
1142 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
1143 ( (bit5) << 5) | ( (bit4) << 4) | \
1144 ( (bit3) << 3) | ( (bit2) << 2) | \
1145 ( (bit1) << 1) | ( (bit0) << 0) ) )
1146
1147 /* these should all get folded out at compile time */
1148 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
1149 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
1150 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
1151 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
1152 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
1153 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
1154 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
1155 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
1156 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
1157
1158 switch (descr) {
1159 /*
1160 +--------------------------------- TREE_DESCR_8_7
1161 | +------------------- TREE_DESCR_8_0
1162 | | +---------------- TREE_DESCR_16_3
1163 | | | +-------------- TREE_DESCR_32_1
1164 | | | | +------------ TREE_DESCR_16_2
1165 | | | | | +--------- TREE_DESCR_64
1166 | | | | | | +------ TREE_DESCR_16_1
1167 | | | | | | | +---- TREE_DESCR_32_0
1168 | | | | | | | | +-- TREE_DESCR_16_0
1169 | | | | | | | | |
1170 | | | | | | | | | GRANULARITY, 7 -> 0 */
1171 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
1172 return BYTE(1,1,1,1,1,1,1,1);
1173 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
1174 return BYTE(1,1,0,1,1,1,1,1);
1175 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
1176 return BYTE(0,1,1,1,1,1,1,1);
1177 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
1178 return BYTE(0,1,0,1,1,1,1,1);
1179
1180 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
1181 return BYTE(1,1,1,1,1,1,0,1);
1182 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
1183 return BYTE(1,1,0,1,1,1,0,1);
1184 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
1185 return BYTE(0,1,1,1,1,1,0,1);
1186 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
1187 return BYTE(0,1,0,1,1,1,0,1);
1188
1189 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
1190 return BYTE(1,1,1,1,0,1,1,1);
1191 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1192 return BYTE(1,1,0,1,0,1,1,1);
1193 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1194 return BYTE(0,1,1,1,0,1,1,1);
1195 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1196 return BYTE(0,1,0,1,0,1,1,1);
1197
1198 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1199 return BYTE(1,1,1,1,0,1,0,1);
1200 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1201 return BYTE(1,1,0,1,0,1,0,1);
1202 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1203 return BYTE(0,1,1,1,0,1,0,1);
1204 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1205 return BYTE(0,1,0,1,0,1,0,1);
1206
1207 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1208 return BYTE(0,0,0,1,1,1,1,1);
1209 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1210 return BYTE(0,0,0,1,1,1,0,1);
1211 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1212 return BYTE(0,0,0,1,0,1,1,1);
1213 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1214 return BYTE(0,0,0,1,0,1,0,1);
1215
1216 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1217 return BYTE(1,1,1,1,0,0,0,1);
1218 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1219 return BYTE(1,1,0,1,0,0,0,1);
1220 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1221 return BYTE(0,1,1,1,0,0,0,1);
1222 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1223 return BYTE(0,1,0,1,0,0,0,1);
1224
1225 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1226 return BYTE(0,0,0,1,0,0,0,1);
1227
1228 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1229 return BYTE(0,0,0,0,0,0,0,1);
1230
1231 default: return BYTE(0,0,0,0,0,0,0,0);
1232 /* INVALID - any valid descr produces at least one
1233 valid bit in tree[0..7]*/
1234 }
1235 /* NOTREACHED*/
1236 tl_assert(0);
1237
1238# undef DESCR
1239# undef BYTE
1240}
1241
1242__attribute__((unused))
1243static Bool is_sane_Descr ( UShort descr ) {
1244 return descr_to_validbits(descr) != 0;
1245}
1246
1247static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1248 VG_(sprintf)(dst,
1249 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1250 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1251 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1252 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1253 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1254 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1255 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1256 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1257 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1258 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1259 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1260 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1261 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1262 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1263 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1264 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1265 );
1266}
1267static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1268 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1269 (Int)((byte & 128) ? 1 : 0),
1270 (Int)((byte & 64) ? 1 : 0),
1271 (Int)((byte & 32) ? 1 : 0),
1272 (Int)((byte & 16) ? 1 : 0),
1273 (Int)((byte & 8) ? 1 : 0),
1274 (Int)((byte & 4) ? 1 : 0),
1275 (Int)((byte & 2) ? 1 : 0),
1276 (Int)((byte & 1) ? 1 : 0)
1277 );
1278}
1279
1280static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1281 Word i;
1282 UChar validbits = descr_to_validbits(descr);
florian7b7d5942014-12-19 20:29:22 +00001283 HChar buf[128], buf2[128]; // large enough
sewardjf98e1c02008-10-25 16:22:41 +00001284 if (validbits == 0)
1285 goto bad;
1286 for (i = 0; i < 8; i++) {
1287 if (validbits & (1<<i)) {
1288 if (tree[i] == SVal_INVALID)
1289 goto bad;
1290 } else {
1291 if (tree[i] != SVal_INVALID)
1292 goto bad;
1293 }
1294 }
1295 return True;
1296 bad:
1297 sprintf_Descr( buf, descr );
1298 sprintf_Byte( buf2, validbits );
1299 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1300 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1301 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1302 for (i = 0; i < 8; i++)
1303 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1304 VG_(printf)("%s","}\n");
1305 return 0;
1306}
1307
1308static Bool is_sane_CacheLine ( CacheLine* cl )
1309{
1310 Word tno, cloff;
1311
1312 if (!cl) goto bad;
1313
1314 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1315 UShort descr = cl->descrs[tno];
1316 SVal* tree = &cl->svals[cloff];
1317 if (!is_sane_Descr_and_Tree(descr, tree))
1318 goto bad;
1319 }
1320 tl_assert(cloff == N_LINE_ARANGE);
1321 return True;
1322 bad:
1323 pp_CacheLine(cl);
1324 return False;
1325}
1326
1327static UShort normalise_tree ( /*MOD*/SVal* tree )
1328{
1329 UShort descr;
1330 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1331 particular no zeroes. */
philippe1475a7f2015-05-11 19:45:08 +00001332 if (CHECK_ZSM
1333 && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1334 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1335 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1336 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
sewardjf98e1c02008-10-25 16:22:41 +00001337 tl_assert(0);
1338
1339 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1340 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1341 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1342 /* build 16-bit layer */
1343 if (tree[1] == tree[0]) {
1344 tree[1] = SVal_INVALID;
1345 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1346 descr |= TREE_DESCR_16_0;
1347 }
1348 if (tree[3] == tree[2]) {
1349 tree[3] = SVal_INVALID;
1350 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1351 descr |= TREE_DESCR_16_1;
1352 }
1353 if (tree[5] == tree[4]) {
1354 tree[5] = SVal_INVALID;
1355 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1356 descr |= TREE_DESCR_16_2;
1357 }
1358 if (tree[7] == tree[6]) {
1359 tree[7] = SVal_INVALID;
1360 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1361 descr |= TREE_DESCR_16_3;
1362 }
1363 /* build 32-bit layer */
1364 if (tree[2] == tree[0]
1365 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1366 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1367 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1368 descr |= TREE_DESCR_32_0;
1369 }
1370 if (tree[6] == tree[4]
1371 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1372 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1373 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1374 descr |= TREE_DESCR_32_1;
1375 }
1376 /* build 64-bit layer */
1377 if (tree[4] == tree[0]
1378 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1379 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1380 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1381 descr |= TREE_DESCR_64;
1382 }
1383 return descr;
1384}
1385
1386/* This takes a cacheline where all the data is at the leaves
1387 (w8[..]) and builds a correctly normalised tree. */
1388static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1389{
1390 Word tno, cloff;
1391 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1392 SVal* tree = &cl->svals[cloff];
1393 cl->descrs[tno] = normalise_tree( tree );
1394 }
1395 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +00001396 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001397 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1398 stats__cline_normalises++;
1399}
1400
1401
1402typedef struct { UChar count; SVal sval; } CountedSVal;
1403
1404static
1405void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1406 /*OUT*/Word* dstUsedP,
1407 Word nDst, CacheLine* src )
1408{
1409 Word tno, cloff, dstUsed;
1410
1411 tl_assert(nDst == N_LINE_ARANGE);
1412 dstUsed = 0;
1413
1414 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1415 UShort descr = src->descrs[tno];
1416 SVal* tree = &src->svals[cloff];
1417
1418 /* sequentialise the tree described by (descr,tree). */
1419# define PUT(_n,_v) \
1420 do { dst[dstUsed ].count = (_n); \
1421 dst[dstUsed++].sval = (_v); \
1422 } while (0)
1423
1424 /* byte 0 */
1425 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1426 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1427 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1428 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1429 /* byte 1 */
1430 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1431 /* byte 2 */
1432 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1433 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1434 /* byte 3 */
1435 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1436 /* byte 4 */
1437 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1438 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1439 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1440 /* byte 5 */
1441 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1442 /* byte 6 */
1443 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1444 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1445 /* byte 7 */
1446 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1447
1448# undef PUT
1449 /* END sequentialise the tree described by (descr,tree). */
1450
1451 }
1452 tl_assert(cloff == N_LINE_ARANGE);
1453 tl_assert(dstUsed <= nDst);
1454
1455 *dstUsedP = dstUsed;
1456}
1457
1458/* Write the cacheline 'wix' to backing store. Where it ends up
1459 is determined by its tag field. */
1460static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1461{
1462 Word i, j, k, m;
1463 Addr tag;
1464 SecMap* sm;
1465 CacheLine* cl;
1466 LineZ* lineZ;
1467 LineF* lineF;
1468 Word zix, fix, csvalsUsed;
1469 CountedSVal csvals[N_LINE_ARANGE];
1470 SVal sv;
1471
1472 if (0)
1473 VG_(printf)("scache wback line %d\n", (Int)wix);
1474
1475 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1476
1477 tag = cache_shmem.tags0[wix];
1478 cl = &cache_shmem.lyns0[wix];
1479
1480 /* The cache line may have been invalidated; if so, ignore it. */
1481 if (!is_valid_scache_tag(tag))
1482 return;
1483
1484 /* Where are we going to put it? */
1485 sm = NULL;
1486 lineZ = NULL;
1487 lineF = NULL;
1488 zix = fix = -1;
1489
1490 /* find the Z line to write in and rcdec it or the associated F
1491 line. */
1492 find_Z_for_writing( &sm, &zix, tag );
1493
1494 tl_assert(sm);
1495 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1496 lineZ = &sm->linesZ[zix];
1497
1498 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001499 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001500 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1501
1502 csvalsUsed = -1;
1503 sequentialise_CacheLine( csvals, &csvalsUsed,
1504 N_LINE_ARANGE, cl );
1505 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
florian5e5cb002015-08-03 21:21:42 +00001506 if (0) VG_(printf)("%ld ", csvalsUsed);
sewardjf98e1c02008-10-25 16:22:41 +00001507
1508 lineZ->dict[0] = lineZ->dict[1]
1509 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1510
1511 /* i indexes actual shadow values, k is cursor in csvals */
1512 i = 0;
1513 for (k = 0; k < csvalsUsed; k++) {
1514
1515 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001516 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001517 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1518 /* do we already have it? */
1519 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1520 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1521 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1522 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1523 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001524 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001525 tl_assert(sv != SVal_INVALID);
1526 if (lineZ->dict[0]
1527 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1528 if (lineZ->dict[1]
1529 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1530 if (lineZ->dict[2]
1531 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1532 if (lineZ->dict[3]
1533 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1534 break; /* we'll have to use the f rep */
1535 dict_ok:
1536 m = csvals[k].count;
1537 if (m == 8) {
1538 write_twobit_array( lineZ->ix2s, i+0, j );
1539 write_twobit_array( lineZ->ix2s, i+1, j );
1540 write_twobit_array( lineZ->ix2s, i+2, j );
1541 write_twobit_array( lineZ->ix2s, i+3, j );
1542 write_twobit_array( lineZ->ix2s, i+4, j );
1543 write_twobit_array( lineZ->ix2s, i+5, j );
1544 write_twobit_array( lineZ->ix2s, i+6, j );
1545 write_twobit_array( lineZ->ix2s, i+7, j );
1546 i += 8;
1547 }
1548 else if (m == 4) {
1549 write_twobit_array( lineZ->ix2s, i+0, j );
1550 write_twobit_array( lineZ->ix2s, i+1, j );
1551 write_twobit_array( lineZ->ix2s, i+2, j );
1552 write_twobit_array( lineZ->ix2s, i+3, j );
1553 i += 4;
1554 }
1555 else if (m == 1) {
1556 write_twobit_array( lineZ->ix2s, i+0, j );
1557 i += 1;
1558 }
1559 else if (m == 2) {
1560 write_twobit_array( lineZ->ix2s, i+0, j );
1561 write_twobit_array( lineZ->ix2s, i+1, j );
1562 i += 2;
1563 }
1564 else {
1565 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1566 }
1567
1568 }
1569
1570 if (LIKELY(i == N_LINE_ARANGE)) {
1571 /* Construction of the compressed representation was
1572 successful. */
1573 rcinc_LineZ(lineZ);
1574 stats__cache_Z_wbacks++;
1575 } else {
1576 /* Cannot use the compressed(z) representation. Use the full(f)
1577 rep instead. */
1578 tl_assert(i >= 0 && i < N_LINE_ARANGE);
sewardjf98e1c02008-10-25 16:22:41 +00001579 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
philippe71ed3c92015-05-17 19:32:42 +00001580 lineF = alloc_LineF_for_Z (lineZ);
sewardjf98e1c02008-10-25 16:22:41 +00001581 i = 0;
1582 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001583 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001584 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1585 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001586 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001587 tl_assert(sv != SVal_INVALID);
1588 for (m = csvals[k].count; m > 0; m--) {
1589 lineF->w64s[i] = sv;
1590 i++;
1591 }
1592 }
1593 tl_assert(i == N_LINE_ARANGE);
1594 rcinc_LineF(lineF);
1595 stats__cache_F_wbacks++;
1596 }
sewardjf98e1c02008-10-25 16:22:41 +00001597}
1598
1599/* Fetch the cacheline 'wix' from the backing store. The tag
1600 associated with 'wix' is assumed to have already been filled in;
1601 hence that is used to determine where in the backing store to read
1602 from. */
1603static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1604{
1605 Word i;
1606 Addr tag;
1607 CacheLine* cl;
1608 LineZ* lineZ;
1609 LineF* lineF;
1610
1611 if (0)
1612 VG_(printf)("scache fetch line %d\n", (Int)wix);
1613
1614 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1615
1616 tag = cache_shmem.tags0[wix];
1617 cl = &cache_shmem.lyns0[wix];
1618
1619 /* reject nonsense requests */
1620 tl_assert(is_valid_scache_tag(tag));
1621
1622 lineZ = NULL;
1623 lineF = NULL;
1624 find_ZF_for_reading( &lineZ, &lineF, tag );
1625 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1626
1627 /* expand the data into the bottom layer of the tree, then get
1628 cacheline_normalise to build the descriptor array. */
1629 if (lineF) {
sewardjf98e1c02008-10-25 16:22:41 +00001630 for (i = 0; i < N_LINE_ARANGE; i++) {
1631 cl->svals[i] = lineF->w64s[i];
1632 }
1633 stats__cache_F_fetches++;
1634 } else {
1635 for (i = 0; i < N_LINE_ARANGE; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00001636 UWord ix = read_twobit_array( lineZ->ix2s, i );
philippe1475a7f2015-05-11 19:45:08 +00001637 if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3);
1638 cl->svals[i] = lineZ->dict[ix];
1639 if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00001640 }
1641 stats__cache_Z_fetches++;
1642 }
1643 normalise_CacheLine( cl );
1644}
1645
philippe8939e092015-05-11 20:18:10 +00001646/* Invalid the cachelines corresponding to the given range, which
1647 must start and end on a cacheline boundary. */
philippef54cb662015-05-10 22:19:31 +00001648static void shmem__invalidate_scache_range (Addr ga, SizeT szB)
1649{
philippef54cb662015-05-10 22:19:31 +00001650 Word wix;
1651
philippe8939e092015-05-11 20:18:10 +00001652 /* ga must be on a cacheline boundary. */
1653 tl_assert (is_valid_scache_tag (ga));
1654 /* szB must be a multiple of cacheline size. */
1655 tl_assert (0 == (szB & (N_LINE_ARANGE - 1)));
1656
1657
philippef54cb662015-05-10 22:19:31 +00001658 Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1);
1659 Word nwix = szB / N_LINE_ARANGE;
1660
1661 if (nwix > N_WAY_NENT)
1662 nwix = N_WAY_NENT; // no need to check several times the same entry.
1663
1664 for (wix = 0; wix < nwix; wix++) {
1665 if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB))
1666 cache_shmem.tags0[ga_ix] = 1/*INVALID*/;
1667 ga_ix++;
philippe364f0bb2015-05-15 09:38:54 +00001668 if (UNLIKELY(ga_ix == N_WAY_NENT))
philippef54cb662015-05-10 22:19:31 +00001669 ga_ix = 0;
1670 }
sewardjf98e1c02008-10-25 16:22:41 +00001671}
1672
philippef54cb662015-05-10 22:19:31 +00001673
sewardjf98e1c02008-10-25 16:22:41 +00001674static void shmem__flush_and_invalidate_scache ( void ) {
1675 Word wix;
1676 Addr tag;
1677 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1678 tl_assert(!is_valid_scache_tag(1));
1679 for (wix = 0; wix < N_WAY_NENT; wix++) {
1680 tag = cache_shmem.tags0[wix];
1681 if (tag == 1/*INVALID*/) {
1682 /* already invalid; nothing to do */
1683 } else {
1684 tl_assert(is_valid_scache_tag(tag));
1685 cacheline_wback( wix );
1686 }
1687 cache_shmem.tags0[wix] = 1/*INVALID*/;
1688 }
philippef54cb662015-05-10 22:19:31 +00001689 stats__cache_flushes_invals++;
sewardjf98e1c02008-10-25 16:22:41 +00001690}
1691
1692
1693static inline Bool aligned16 ( Addr a ) {
1694 return 0 == (a & 1);
1695}
1696static inline Bool aligned32 ( Addr a ) {
1697 return 0 == (a & 3);
1698}
1699static inline Bool aligned64 ( Addr a ) {
1700 return 0 == (a & 7);
1701}
1702static inline UWord get_cacheline_offset ( Addr a ) {
1703 return (UWord)(a & (N_LINE_ARANGE - 1));
1704}
1705static inline Addr cacheline_ROUNDUP ( Addr a ) {
1706 return ROUNDUP(a, N_LINE_ARANGE);
1707}
1708static inline Addr cacheline_ROUNDDN ( Addr a ) {
1709 return ROUNDDN(a, N_LINE_ARANGE);
1710}
1711static inline UWord get_treeno ( Addr a ) {
1712 return get_cacheline_offset(a) >> 3;
1713}
1714static inline UWord get_tree_offset ( Addr a ) {
1715 return a & 7;
1716}
1717
1718static __attribute__((noinline))
1719 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1720static inline CacheLine* get_cacheline ( Addr a )
1721{
1722 /* tag is 'a' with the in-line offset masked out,
1723 eg a[31]..a[4] 0000 */
1724 Addr tag = a & ~(N_LINE_ARANGE - 1);
1725 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1726 stats__cache_totrefs++;
1727 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1728 return &cache_shmem.lyns0[wix];
1729 } else {
1730 return get_cacheline_MISS( a );
1731 }
1732}
1733
1734static __attribute__((noinline))
1735 CacheLine* get_cacheline_MISS ( Addr a )
1736{
1737 /* tag is 'a' with the in-line offset masked out,
1738 eg a[31]..a[4] 0000 */
1739
1740 CacheLine* cl;
1741 Addr* tag_old_p;
1742 Addr tag = a & ~(N_LINE_ARANGE - 1);
1743 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1744
1745 tl_assert(tag != cache_shmem.tags0[wix]);
1746
1747 /* Dump the old line into the backing store. */
1748 stats__cache_totmisses++;
1749
1750 cl = &cache_shmem.lyns0[wix];
1751 tag_old_p = &cache_shmem.tags0[wix];
1752
1753 if (is_valid_scache_tag( *tag_old_p )) {
1754 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001755 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001756 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1757 cacheline_wback( wix );
1758 }
1759 /* and reload the new one */
1760 *tag_old_p = tag;
1761 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001762 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001763 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1764 return cl;
1765}
1766
1767static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1768 stats__cline_64to32pulldown++;
1769 switch (toff) {
1770 case 0: case 4:
1771 tl_assert(descr & TREE_DESCR_64);
1772 tree[4] = tree[0];
1773 descr &= ~TREE_DESCR_64;
1774 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1775 break;
1776 default:
1777 tl_assert(0);
1778 }
1779 return descr;
1780}
1781
1782static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1783 stats__cline_32to16pulldown++;
1784 switch (toff) {
1785 case 0: case 2:
1786 if (!(descr & TREE_DESCR_32_0)) {
1787 descr = pulldown_to_32(tree, 0, descr);
1788 }
1789 tl_assert(descr & TREE_DESCR_32_0);
1790 tree[2] = tree[0];
1791 descr &= ~TREE_DESCR_32_0;
1792 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1793 break;
1794 case 4: case 6:
1795 if (!(descr & TREE_DESCR_32_1)) {
1796 descr = pulldown_to_32(tree, 4, descr);
1797 }
1798 tl_assert(descr & TREE_DESCR_32_1);
1799 tree[6] = tree[4];
1800 descr &= ~TREE_DESCR_32_1;
1801 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1802 break;
1803 default:
1804 tl_assert(0);
1805 }
1806 return descr;
1807}
1808
1809static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1810 stats__cline_16to8pulldown++;
1811 switch (toff) {
1812 case 0: case 1:
1813 if (!(descr & TREE_DESCR_16_0)) {
1814 descr = pulldown_to_16(tree, 0, descr);
1815 }
1816 tl_assert(descr & TREE_DESCR_16_0);
1817 tree[1] = tree[0];
1818 descr &= ~TREE_DESCR_16_0;
1819 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1820 break;
1821 case 2: case 3:
1822 if (!(descr & TREE_DESCR_16_1)) {
1823 descr = pulldown_to_16(tree, 2, descr);
1824 }
1825 tl_assert(descr & TREE_DESCR_16_1);
1826 tree[3] = tree[2];
1827 descr &= ~TREE_DESCR_16_1;
1828 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1829 break;
1830 case 4: case 5:
1831 if (!(descr & TREE_DESCR_16_2)) {
1832 descr = pulldown_to_16(tree, 4, descr);
1833 }
1834 tl_assert(descr & TREE_DESCR_16_2);
1835 tree[5] = tree[4];
1836 descr &= ~TREE_DESCR_16_2;
1837 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1838 break;
1839 case 6: case 7:
1840 if (!(descr & TREE_DESCR_16_3)) {
1841 descr = pulldown_to_16(tree, 6, descr);
1842 }
1843 tl_assert(descr & TREE_DESCR_16_3);
1844 tree[7] = tree[6];
1845 descr &= ~TREE_DESCR_16_3;
1846 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1847 break;
1848 default:
1849 tl_assert(0);
1850 }
1851 return descr;
1852}
1853
1854
1855static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1856 UShort mask;
1857 switch (toff) {
1858 case 0:
1859 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1860 tl_assert( (descr & mask) == mask );
1861 descr &= ~mask;
1862 descr |= TREE_DESCR_16_0;
1863 break;
1864 case 2:
1865 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1866 tl_assert( (descr & mask) == mask );
1867 descr &= ~mask;
1868 descr |= TREE_DESCR_16_1;
1869 break;
1870 case 4:
1871 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1872 tl_assert( (descr & mask) == mask );
1873 descr &= ~mask;
1874 descr |= TREE_DESCR_16_2;
1875 break;
1876 case 6:
1877 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1878 tl_assert( (descr & mask) == mask );
1879 descr &= ~mask;
1880 descr |= TREE_DESCR_16_3;
1881 break;
1882 default:
1883 tl_assert(0);
1884 }
1885 return descr;
1886}
1887
1888static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1889 UShort mask;
1890 switch (toff) {
1891 case 0:
1892 if (!(descr & TREE_DESCR_16_0))
1893 descr = pullup_descr_to_16(descr, 0);
1894 if (!(descr & TREE_DESCR_16_1))
1895 descr = pullup_descr_to_16(descr, 2);
1896 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1897 tl_assert( (descr & mask) == mask );
1898 descr &= ~mask;
1899 descr |= TREE_DESCR_32_0;
1900 break;
1901 case 4:
1902 if (!(descr & TREE_DESCR_16_2))
1903 descr = pullup_descr_to_16(descr, 4);
1904 if (!(descr & TREE_DESCR_16_3))
1905 descr = pullup_descr_to_16(descr, 6);
1906 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1907 tl_assert( (descr & mask) == mask );
1908 descr &= ~mask;
1909 descr |= TREE_DESCR_32_1;
1910 break;
1911 default:
1912 tl_assert(0);
1913 }
1914 return descr;
1915}
1916
1917static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1918 switch (toff) {
1919 case 0: case 4:
1920 return 0 != (descr & TREE_DESCR_64);
1921 default:
1922 tl_assert(0);
1923 }
1924}
1925
1926static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1927 switch (toff) {
1928 case 0:
1929 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1930 case 2:
1931 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1932 case 4:
1933 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1934 case 6:
1935 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1936 default:
1937 tl_assert(0);
1938 }
1939}
1940
1941/* ------------ Cache management ------------ */
1942
1943static void zsm_flush_cache ( void )
1944{
1945 shmem__flush_and_invalidate_scache();
1946}
1947
1948
philippe1475a7f2015-05-11 19:45:08 +00001949static void zsm_init ( void )
sewardjf98e1c02008-10-25 16:22:41 +00001950{
1951 tl_assert( sizeof(UWord) == sizeof(Addr) );
1952
sewardjf98e1c02008-10-25 16:22:41 +00001953 tl_assert(map_shmem == NULL);
1954 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1955 HG_(free),
1956 NULL/*unboxed UWord cmp*/);
philippef54cb662015-05-10 22:19:31 +00001957 /* Invalidate all cache entries. */
1958 tl_assert(!is_valid_scache_tag(1));
1959 for (UWord wix = 0; wix < N_WAY_NENT; wix++) {
1960 cache_shmem.tags0[wix] = 1/*INVALID*/;
1961 }
sewardjf98e1c02008-10-25 16:22:41 +00001962
philippe71ed3c92015-05-17 19:32:42 +00001963 LineF_pool_allocator = VG_(newPA) (
1964 sizeof(LineF),
1965 /* Nr elements/pool to fill a core arena block
1966 taking some arena overhead into account. */
1967 (4 * 1024 * 1024 - 200)/sizeof(LineF),
1968 HG_(zalloc),
1969 "libhb.LineF_storage.pool",
1970 HG_(free)
1971 );
1972
sewardjf98e1c02008-10-25 16:22:41 +00001973 /* a SecMap must contain an integral number of CacheLines */
1974 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1975 /* also ... a CacheLine holds an integral number of trees */
1976 tl_assert(0 == (N_LINE_ARANGE % 8));
1977}
1978
1979/////////////////////////////////////////////////////////////////
1980/////////////////////////////////////////////////////////////////
1981// //
1982// SECTION END compressed shadow memory //
1983// //
1984/////////////////////////////////////////////////////////////////
1985/////////////////////////////////////////////////////////////////
1986
1987
1988
1989/////////////////////////////////////////////////////////////////
1990/////////////////////////////////////////////////////////////////
1991// //
1992// SECTION BEGIN vts primitives //
1993// //
1994/////////////////////////////////////////////////////////////////
1995/////////////////////////////////////////////////////////////////
1996
sewardjf98e1c02008-10-25 16:22:41 +00001997
sewardje4cce742011-02-24 15:25:24 +00001998/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1999 being compact stand-ins for Thr*'s. Use these functions to map
2000 between them. */
2001static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
2002static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
2003
sewardje4cce742011-02-24 15:25:24 +00002004__attribute__((noreturn))
2005static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
2006{
2007 if (due_to_nThrs) {
florian6bf37262012-10-21 03:23:36 +00002008 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00002009 "\n"
2010 "Helgrind: cannot continue, run aborted: too many threads.\n"
2011 "Sorry. Helgrind can only handle programs that create\n"
2012 "%'llu or fewer threads over their entire lifetime.\n"
2013 "\n";
sewardj03e7d272011-05-04 09:08:34 +00002014 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
sewardje4cce742011-02-24 15:25:24 +00002015 } else {
florian6bf37262012-10-21 03:23:36 +00002016 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00002017 "\n"
2018 "Helgrind: cannot continue, run aborted: too many\n"
2019 "synchronisation events. Sorry. Helgrind can only handle\n"
2020 "programs which perform %'llu or fewer\n"
2021 "inter-thread synchronisation events (locks, unlocks, etc).\n"
2022 "\n";
2023 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
2024 }
2025 VG_(exit)(1);
2026 /*NOTREACHED*/
2027 tl_assert(0); /*wtf?!*/
2028}
2029
2030
philippec3508652015-03-28 12:01:58 +00002031/* The dead thread (ThrID, actually) tables. A thread may only be
sewardjffce8152011-06-24 10:09:41 +00002032 listed here if we have been notified thereof by libhb_async_exit.
2033 New entries are added at the end. The order isn't important, but
philippec3508652015-03-28 12:01:58 +00002034 the ThrID values must be unique.
2035 verydead_thread_table_not_pruned lists the identity of the threads
2036 that died since the previous round of pruning.
2037 Once pruning is done, these ThrID are added in verydead_thread_table.
2038 We don't actually need to keep the set of threads that have ever died --
sewardjffce8152011-06-24 10:09:41 +00002039 only the threads that have died since the previous round of
2040 pruning. But it's useful for sanity check purposes to keep the
2041 entire set, so we do. */
philippec3508652015-03-28 12:01:58 +00002042static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
sewardjffce8152011-06-24 10:09:41 +00002043static XArray* /* of ThrID */ verydead_thread_table = NULL;
2044
2045/* Arbitrary total ordering on ThrIDs. */
florian6bd9dc12012-11-23 16:17:43 +00002046static Int cmp__ThrID ( const void* v1, const void* v2 ) {
2047 ThrID id1 = *(const ThrID*)v1;
2048 ThrID id2 = *(const ThrID*)v2;
sewardjffce8152011-06-24 10:09:41 +00002049 if (id1 < id2) return -1;
2050 if (id1 > id2) return 1;
2051 return 0;
2052}
2053
philippec3508652015-03-28 12:01:58 +00002054static void verydead_thread_tables_init ( void )
sewardjffce8152011-06-24 10:09:41 +00002055{
2056 tl_assert(!verydead_thread_table);
philippec3508652015-03-28 12:01:58 +00002057 tl_assert(!verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00002058 verydead_thread_table
2059 = VG_(newXA)( HG_(zalloc),
2060 "libhb.verydead_thread_table_init.1",
2061 HG_(free), sizeof(ThrID) );
sewardjffce8152011-06-24 10:09:41 +00002062 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
philippec3508652015-03-28 12:01:58 +00002063 verydead_thread_table_not_pruned
2064 = VG_(newXA)( HG_(zalloc),
2065 "libhb.verydead_thread_table_init.2",
2066 HG_(free), sizeof(ThrID) );
2067 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
sewardjffce8152011-06-24 10:09:41 +00002068}
2069
philippec3508652015-03-28 12:01:58 +00002070static void verydead_thread_table_sort_and_check (XArray* thrids)
2071{
2072 UWord i;
2073
2074 VG_(sortXA)( thrids );
2075 /* Sanity check: check for unique .sts.thr values. */
2076 UWord nBT = VG_(sizeXA)( thrids );
2077 if (nBT > 0) {
2078 ThrID thrid1, thrid2;
2079 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
2080 for (i = 1; i < nBT; i++) {
2081 thrid1 = thrid2;
2082 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
2083 tl_assert(thrid1 < thrid2);
2084 }
2085 }
2086 /* Ok, so the dead thread table thrids has unique and in-order keys. */
2087}
sewardjf98e1c02008-10-25 16:22:41 +00002088
2089/* A VTS contains .ts, its vector clock, and also .id, a field to hold
2090 a backlink for the caller's convenience. Since we have no idea
2091 what to set that to in the library, it always gets set to
2092 VtsID_INVALID. */
2093typedef
2094 struct {
sewardj7aa38a92011-02-27 23:04:12 +00002095 VtsID id;
2096 UInt usedTS;
2097 UInt sizeTS;
2098 ScalarTS ts[0];
sewardjf98e1c02008-10-25 16:22:41 +00002099 }
2100 VTS;
2101
sewardj7aa38a92011-02-27 23:04:12 +00002102/* Allocate a VTS capable of storing 'sizeTS' entries. */
florian6bd9dc12012-11-23 16:17:43 +00002103static VTS* VTS__new ( const HChar* who, UInt sizeTS );
sewardjf98e1c02008-10-25 16:22:41 +00002104
sewardjffce8152011-06-24 10:09:41 +00002105/* Make a clone of 'vts', sizing the new array to exactly match the
sewardj7aa38a92011-02-27 23:04:12 +00002106 number of ScalarTSs present. */
florian6bd9dc12012-11-23 16:17:43 +00002107static VTS* VTS__clone ( const HChar* who, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002108
sewardjffce8152011-06-24 10:09:41 +00002109/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
2110 array is sized exactly to hold the number of required elements.
2111 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
2112 must be in strictly increasing order. */
florian6bd9dc12012-11-23 16:17:43 +00002113static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
sewardjffce8152011-06-24 10:09:41 +00002114
sewardjf98e1c02008-10-25 16:22:41 +00002115/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00002116static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002117
sewardj7aa38a92011-02-27 23:04:12 +00002118/* Create a new singleton VTS in 'out'. Caller must have
2119 pre-allocated 'out' sufficiently big to hold the result in all
2120 possible cases. */
2121static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00002122
sewardj7aa38a92011-02-27 23:04:12 +00002123/* Create in 'out' a VTS which is the same as 'vts' except with
2124 vts[me]++, so to speak. Caller must have pre-allocated 'out'
2125 sufficiently big to hold the result in all possible cases. */
2126static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002127
sewardj7aa38a92011-02-27 23:04:12 +00002128/* Create in 'out' a VTS which is the join (max) of 'a' and
2129 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
2130 the result in all possible cases. */
2131static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002132
sewardj23f12002009-07-24 08:45:08 +00002133/* Compute the partial ordering relation of the two args. Although we
2134 could be completely general and return an enumeration value (EQ,
2135 LT, GT, UN), in fact we only need LEQ, and so we may as well
2136 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00002137
sewardje4cce742011-02-24 15:25:24 +00002138 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
2139 invald ThrID). In the latter case, the returned ThrID indicates
2140 the discovered point for which they are not. There may be more
2141 than one such point, but we only care about seeing one of them, not
2142 all of them. This rather strange convention is used because
2143 sometimes we want to know the actual index at which they first
2144 differ. */
2145static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002146
2147/* Compute an arbitrary structural (total) ordering on the two args,
2148 based on their VCs, so they can be looked up in a table, tree, etc.
2149 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00002150static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002151
florianb28fe892014-10-28 20:52:07 +00002152/* Debugging only. Display the given VTS. */
2153static void VTS__show ( const VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002154
2155/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00002156static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002157
sewardjffce8152011-06-24 10:09:41 +00002158/* Notify the VTS machinery that a thread has been declared
2159 comprehensively dead: that is, it has done an async exit AND it has
2160 been joined with. This should ensure that its local clocks (.viR
2161 and .viW) will never again change, and so all mentions of this
2162 thread from all VTSs in the system may be removed. */
2163static void VTS__declare_thread_very_dead ( Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002164
2165/*--------------- to do with Vector Timestamps ---------------*/
2166
sewardjf98e1c02008-10-25 16:22:41 +00002167static Bool is_sane_VTS ( VTS* vts )
2168{
2169 UWord i, n;
2170 ScalarTS *st1, *st2;
2171 if (!vts) return False;
sewardj555fc572011-02-27 23:39:53 +00002172 if (vts->usedTS > vts->sizeTS) return False;
sewardj7aa38a92011-02-27 23:04:12 +00002173 n = vts->usedTS;
2174 if (n == 1) {
2175 st1 = &vts->ts[0];
2176 if (st1->tym == 0)
2177 return False;
2178 }
2179 else
sewardjf98e1c02008-10-25 16:22:41 +00002180 if (n >= 2) {
2181 for (i = 0; i < n-1; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002182 st1 = &vts->ts[i];
2183 st2 = &vts->ts[i+1];
sewardje4cce742011-02-24 15:25:24 +00002184 if (st1->thrid >= st2->thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002185 return False;
2186 if (st1->tym == 0 || st2->tym == 0)
2187 return False;
2188 }
2189 }
2190 return True;
2191}
2192
2193
sewardj7aa38a92011-02-27 23:04:12 +00002194/* Create a new, empty VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002195*/
florian6bd9dc12012-11-23 16:17:43 +00002196static VTS* VTS__new ( const HChar* who, UInt sizeTS )
sewardjf98e1c02008-10-25 16:22:41 +00002197{
sewardj7aa38a92011-02-27 23:04:12 +00002198 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
2199 tl_assert(vts->usedTS == 0);
2200 vts->sizeTS = sizeTS;
2201 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
sewardjf98e1c02008-10-25 16:22:41 +00002202 return vts;
2203}
2204
sewardj7aa38a92011-02-27 23:04:12 +00002205/* Clone this VTS.
2206*/
florian6bd9dc12012-11-23 16:17:43 +00002207static VTS* VTS__clone ( const HChar* who, VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002208{
2209 tl_assert(vts);
2210 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2211 UInt nTS = vts->usedTS;
2212 VTS* clone = VTS__new(who, nTS);
2213 clone->id = vts->id;
2214 clone->sizeTS = nTS;
2215 clone->usedTS = nTS;
2216 UInt i;
2217 for (i = 0; i < nTS; i++) {
2218 clone->ts[i] = vts->ts[i];
2219 }
2220 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2221 return clone;
2222}
2223
sewardjf98e1c02008-10-25 16:22:41 +00002224
sewardjffce8152011-06-24 10:09:41 +00002225/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
2226 must be in strictly increasing order. We could obviously do this
2227 much more efficiently (in linear time) if necessary.
2228*/
florian6bd9dc12012-11-23 16:17:43 +00002229static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
sewardjffce8152011-06-24 10:09:41 +00002230{
2231 UInt i, j;
2232 tl_assert(vts);
2233 tl_assert(thridsToDel);
2234 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2235 UInt nTS = vts->usedTS;
2236 /* Figure out how many ScalarTSs will remain in the output. */
2237 UInt nReq = nTS;
2238 for (i = 0; i < nTS; i++) {
2239 ThrID thrid = vts->ts[i].thrid;
2240 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2241 nReq--;
2242 }
2243 tl_assert(nReq <= nTS);
2244 /* Copy the ones that will remain. */
2245 VTS* res = VTS__new(who, nReq);
2246 j = 0;
2247 for (i = 0; i < nTS; i++) {
2248 ThrID thrid = vts->ts[i].thrid;
2249 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2250 continue;
2251 res->ts[j++] = vts->ts[i];
2252 }
2253 tl_assert(j == nReq);
2254 tl_assert(j == res->sizeTS);
2255 res->usedTS = j;
2256 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2257 return res;
2258}
2259
2260
sewardjf98e1c02008-10-25 16:22:41 +00002261/* Delete this VTS in its entirety.
2262*/
sewardj7aa38a92011-02-27 23:04:12 +00002263static void VTS__delete ( VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002264{
2265 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002266 tl_assert(vts->usedTS <= vts->sizeTS);
2267 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
sewardjf98e1c02008-10-25 16:22:41 +00002268 HG_(free)(vts);
2269}
2270
2271
2272/* Create a new singleton VTS.
2273*/
sewardj7aa38a92011-02-27 23:04:12 +00002274static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2275{
sewardjf98e1c02008-10-25 16:22:41 +00002276 tl_assert(thr);
2277 tl_assert(tym >= 1);
sewardj7aa38a92011-02-27 23:04:12 +00002278 tl_assert(out);
2279 tl_assert(out->usedTS == 0);
2280 tl_assert(out->sizeTS >= 1);
2281 UInt hi = out->usedTS++;
2282 out->ts[hi].thrid = Thr__to_ThrID(thr);
2283 out->ts[hi].tym = tym;
sewardjf98e1c02008-10-25 16:22:41 +00002284}
2285
2286
2287/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2288 not modified.
2289*/
sewardj7aa38a92011-02-27 23:04:12 +00002290static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002291{
sewardj7aa38a92011-02-27 23:04:12 +00002292 UInt i, n;
sewardje4cce742011-02-24 15:25:24 +00002293 ThrID me_thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002294 Bool found = False;
sewardjc8028ad2010-05-05 09:34:42 +00002295
2296 stats__vts__tick++;
2297
sewardj7aa38a92011-02-27 23:04:12 +00002298 tl_assert(out);
2299 tl_assert(out->usedTS == 0);
2300 if (vts->usedTS >= ThrID_MAX_VALID)
2301 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2302 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2303
sewardjf98e1c02008-10-25 16:22:41 +00002304 tl_assert(me);
sewardje4cce742011-02-24 15:25:24 +00002305 me_thrid = Thr__to_ThrID(me);
sewardjf98e1c02008-10-25 16:22:41 +00002306 tl_assert(is_sane_VTS(vts));
sewardj7aa38a92011-02-27 23:04:12 +00002307 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002308
sewardj555fc572011-02-27 23:39:53 +00002309 /* Copy all entries which precede 'me'. */
2310 for (i = 0; i < n; i++) {
2311 ScalarTS* here = &vts->ts[i];
2312 if (UNLIKELY(here->thrid >= me_thrid))
2313 break;
2314 UInt hi = out->usedTS++;
2315 out->ts[hi] = *here;
2316 }
2317
2318 /* 'i' now indicates the next entry to copy, if any.
2319 There are 3 possibilities:
2320 (a) there is no next entry (we used them all up already):
2321 add (me_thrid,1) to the output, and quit
2322 (b) there is a next entry, and its thrid > me_thrid:
2323 add (me_thrid,1) to the output, then copy the remaining entries
2324 (c) there is a next entry, and its thrid == me_thrid:
2325 copy it to the output but increment its timestamp value.
2326 Then copy the remaining entries. (c) is the common case.
2327 */
2328 tl_assert(i >= 0 && i <= n);
2329 if (i == n) { /* case (a) */
sewardj7aa38a92011-02-27 23:04:12 +00002330 UInt hi = out->usedTS++;
2331 out->ts[hi].thrid = me_thrid;
2332 out->ts[hi].tym = 1;
sewardj555fc572011-02-27 23:39:53 +00002333 } else {
2334 /* cases (b) and (c) */
2335 ScalarTS* here = &vts->ts[i];
2336 if (me_thrid == here->thrid) { /* case (c) */
sewardj7aa38a92011-02-27 23:04:12 +00002337 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
sewardje4cce742011-02-24 15:25:24 +00002338 /* We're hosed. We have to stop. */
2339 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2340 }
sewardj7aa38a92011-02-27 23:04:12 +00002341 UInt hi = out->usedTS++;
2342 out->ts[hi].thrid = here->thrid;
2343 out->ts[hi].tym = here->tym + 1;
sewardjf98e1c02008-10-25 16:22:41 +00002344 i++;
sewardj555fc572011-02-27 23:39:53 +00002345 found = True;
2346 } else { /* case (b) */
sewardj7aa38a92011-02-27 23:04:12 +00002347 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002348 out->ts[hi].thrid = me_thrid;
2349 out->ts[hi].tym = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002350 }
sewardj555fc572011-02-27 23:39:53 +00002351 /* And copy any remaining entries. */
sewardjf98e1c02008-10-25 16:22:41 +00002352 for (/*keepgoing*/; i < n; i++) {
sewardj555fc572011-02-27 23:39:53 +00002353 ScalarTS* here2 = &vts->ts[i];
sewardj7aa38a92011-02-27 23:04:12 +00002354 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002355 out->ts[hi] = *here2;
sewardjf98e1c02008-10-25 16:22:41 +00002356 }
2357 }
sewardj555fc572011-02-27 23:39:53 +00002358
sewardj7aa38a92011-02-27 23:04:12 +00002359 tl_assert(is_sane_VTS(out));
2360 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2361 tl_assert(out->usedTS <= out->sizeTS);
sewardjf98e1c02008-10-25 16:22:41 +00002362}
2363
2364
2365/* Return a new VTS constructed as the join (max) of the 2 args.
2366 Neither arg is modified.
2367*/
sewardj7aa38a92011-02-27 23:04:12 +00002368static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002369{
sewardj7aa38a92011-02-27 23:04:12 +00002370 UInt ia, ib, useda, usedb;
sewardjf98e1c02008-10-25 16:22:41 +00002371 ULong tyma, tymb, tymMax;
sewardje4cce742011-02-24 15:25:24 +00002372 ThrID thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002373 UInt ncommon = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002374
sewardjc8028ad2010-05-05 09:34:42 +00002375 stats__vts__join++;
2376
sewardj7aa38a92011-02-27 23:04:12 +00002377 tl_assert(a);
2378 tl_assert(b);
2379 useda = a->usedTS;
2380 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002381
sewardj7aa38a92011-02-27 23:04:12 +00002382 tl_assert(out);
2383 tl_assert(out->usedTS == 0);
2384 /* overly conservative test, but doing better involves comparing
2385 the two VTSs, which we don't want to do at this point. */
2386 if (useda + usedb >= ThrID_MAX_VALID)
2387 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2388 tl_assert(out->sizeTS >= useda + usedb);
2389
sewardjf98e1c02008-10-25 16:22:41 +00002390 ia = ib = 0;
2391
2392 while (1) {
2393
sewardje4cce742011-02-24 15:25:24 +00002394 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2395 from a and b in order, where thrid is the next ThrID
sewardjf98e1c02008-10-25 16:22:41 +00002396 occurring in either a or b, and tyma/b are the relevant
2397 scalar timestamps, taking into account implicit zeroes. */
2398 tl_assert(ia >= 0 && ia <= useda);
2399 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002400
njn4c245e52009-03-15 23:25:38 +00002401 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002402 /* both empty - done */
2403 break;
njn4c245e52009-03-15 23:25:38 +00002404
2405 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002406 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002407 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002408 thrid = tmpb->thrid;
2409 tyma = 0;
2410 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002411 ib++;
njn4c245e52009-03-15 23:25:38 +00002412
2413 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002414 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002415 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002416 thrid = tmpa->thrid;
2417 tyma = tmpa->tym;
2418 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002419 ia++;
njn4c245e52009-03-15 23:25:38 +00002420
2421 } else {
sewardje4cce742011-02-24 15:25:24 +00002422 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002423 ScalarTS* tmpa = &a->ts[ia];
2424 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002425 if (tmpa->thrid < tmpb->thrid) {
2426 /* a has the lowest unconsidered ThrID */
2427 thrid = tmpa->thrid;
2428 tyma = tmpa->tym;
2429 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002430 ia++;
sewardje4cce742011-02-24 15:25:24 +00002431 } else if (tmpa->thrid > tmpb->thrid) {
2432 /* b has the lowest unconsidered ThrID */
2433 thrid = tmpb->thrid;
2434 tyma = 0;
2435 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002436 ib++;
2437 } else {
sewardje4cce742011-02-24 15:25:24 +00002438 /* they both next mention the same ThrID */
2439 tl_assert(tmpa->thrid == tmpb->thrid);
2440 thrid = tmpa->thrid; /* == tmpb->thrid */
2441 tyma = tmpa->tym;
2442 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002443 ia++;
2444 ib++;
sewardj7aa38a92011-02-27 23:04:12 +00002445 ncommon++;
sewardjf98e1c02008-10-25 16:22:41 +00002446 }
2447 }
2448
2449 /* having laboriously determined (thr, tyma, tymb), do something
2450 useful with it. */
2451 tymMax = tyma > tymb ? tyma : tymb;
2452 if (tymMax > 0) {
sewardj7aa38a92011-02-27 23:04:12 +00002453 UInt hi = out->usedTS++;
2454 out->ts[hi].thrid = thrid;
2455 out->ts[hi].tym = tymMax;
sewardjf98e1c02008-10-25 16:22:41 +00002456 }
2457
2458 }
2459
sewardj7aa38a92011-02-27 23:04:12 +00002460 tl_assert(is_sane_VTS(out));
2461 tl_assert(out->usedTS <= out->sizeTS);
2462 tl_assert(out->usedTS == useda + usedb - ncommon);
sewardjf98e1c02008-10-25 16:22:41 +00002463}
2464
2465
sewardje4cce742011-02-24 15:25:24 +00002466/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2467 they are, or the first ThrID for which they are not (no valid ThrID
2468 has the value zero). This rather strange convention is used
2469 because sometimes we want to know the actual index at which they
2470 first differ. */
2471static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002472{
sewardj23f12002009-07-24 08:45:08 +00002473 Word ia, ib, useda, usedb;
2474 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00002475
sewardjc8028ad2010-05-05 09:34:42 +00002476 stats__vts__cmpLEQ++;
2477
sewardj7aa38a92011-02-27 23:04:12 +00002478 tl_assert(a);
2479 tl_assert(b);
2480 useda = a->usedTS;
2481 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002482
2483 ia = ib = 0;
2484
2485 while (1) {
2486
njn4c245e52009-03-15 23:25:38 +00002487 /* This logic is to enumerate doubles (tyma, tymb) drawn
2488 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00002489 scalar timestamps, taking into account implicit zeroes. */
sewardje4cce742011-02-24 15:25:24 +00002490 ThrID thrid;
sewardj23f12002009-07-24 08:45:08 +00002491
sewardjf98e1c02008-10-25 16:22:41 +00002492 tl_assert(ia >= 0 && ia <= useda);
2493 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002494
njn4c245e52009-03-15 23:25:38 +00002495 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002496 /* both empty - done */
2497 break;
njn4c245e52009-03-15 23:25:38 +00002498
2499 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002500 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002501 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002502 tyma = 0;
2503 tymb = tmpb->tym;
2504 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002505 ib++;
njn4c245e52009-03-15 23:25:38 +00002506
2507 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002508 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002509 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002510 tyma = tmpa->tym;
2511 thrid = tmpa->thrid;
2512 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002513 ia++;
njn4c245e52009-03-15 23:25:38 +00002514
2515 } else {
sewardje4cce742011-02-24 15:25:24 +00002516 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002517 ScalarTS* tmpa = &a->ts[ia];
2518 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002519 if (tmpa->thrid < tmpb->thrid) {
2520 /* a has the lowest unconsidered ThrID */
2521 tyma = tmpa->tym;
2522 thrid = tmpa->thrid;
2523 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002524 ia++;
2525 }
2526 else
sewardje4cce742011-02-24 15:25:24 +00002527 if (tmpa->thrid > tmpb->thrid) {
2528 /* b has the lowest unconsidered ThrID */
2529 tyma = 0;
2530 tymb = tmpb->tym;
2531 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002532 ib++;
2533 } else {
sewardje4cce742011-02-24 15:25:24 +00002534 /* they both next mention the same ThrID */
2535 tl_assert(tmpa->thrid == tmpb->thrid);
2536 tyma = tmpa->tym;
2537 thrid = tmpa->thrid;
2538 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002539 ia++;
2540 ib++;
2541 }
2542 }
2543
njn4c245e52009-03-15 23:25:38 +00002544 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00002545 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00002546 if (tyma > tymb) {
2547 /* not LEQ at this index. Quit, since the answer is
2548 determined already. */
sewardje4cce742011-02-24 15:25:24 +00002549 tl_assert(thrid >= 1024);
2550 return thrid;
sewardj23f12002009-07-24 08:45:08 +00002551 }
sewardjf98e1c02008-10-25 16:22:41 +00002552 }
2553
sewardje4cce742011-02-24 15:25:24 +00002554 return 0; /* all points are LEQ => return an invalid ThrID */
sewardjf98e1c02008-10-25 16:22:41 +00002555}
2556
2557
2558/* Compute an arbitrary structural (total) ordering on the two args,
2559 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00002560 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2561 performance critical so there is some effort expended to make it sa
2562 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00002563*/
2564Word VTS__cmp_structural ( VTS* a, VTS* b )
2565{
2566 /* We just need to generate an arbitrary total ordering based on
2567 a->ts and b->ts. Preferably do it in a way which comes across likely
2568 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00002569 Word i;
2570 Word useda = 0, usedb = 0;
2571 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002572
sewardjc8028ad2010-05-05 09:34:42 +00002573 stats__vts__cmp_structural++;
2574
2575 tl_assert(a);
2576 tl_assert(b);
2577
sewardj7aa38a92011-02-27 23:04:12 +00002578 ctsa = &a->ts[0]; useda = a->usedTS;
2579 ctsb = &b->ts[0]; usedb = b->usedTS;
sewardjc8028ad2010-05-05 09:34:42 +00002580
2581 if (LIKELY(useda == usedb)) {
2582 ScalarTS *tmpa = NULL, *tmpb = NULL;
2583 stats__vts__cmp_structural_slow++;
2584 /* Same length vectors. Find the first difference, if any, as
2585 fast as possible. */
2586 for (i = 0; i < useda; i++) {
2587 tmpa = &ctsa[i];
2588 tmpb = &ctsb[i];
sewardje4cce742011-02-24 15:25:24 +00002589 if (LIKELY(tmpa->tym == tmpb->tym
2590 && tmpa->thrid == tmpb->thrid))
sewardjc8028ad2010-05-05 09:34:42 +00002591 continue;
2592 else
2593 break;
2594 }
2595 if (UNLIKELY(i == useda)) {
2596 /* They're identical. */
2597 return 0;
2598 } else {
2599 tl_assert(i >= 0 && i < useda);
2600 if (tmpa->tym < tmpb->tym) return -1;
2601 if (tmpa->tym > tmpb->tym) return 1;
sewardje4cce742011-02-24 15:25:24 +00002602 if (tmpa->thrid < tmpb->thrid) return -1;
2603 if (tmpa->thrid > tmpb->thrid) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002604 /* we just established them as non-identical, hence: */
2605 }
2606 /*NOTREACHED*/
2607 tl_assert(0);
2608 }
sewardjf98e1c02008-10-25 16:22:41 +00002609
2610 if (useda < usedb) return -1;
2611 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002612 /*NOTREACHED*/
2613 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00002614}
2615
2616
florianb28fe892014-10-28 20:52:07 +00002617/* Debugging only. Display the given VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002618*/
florianb28fe892014-10-28 20:52:07 +00002619static void VTS__show ( const VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002620{
sewardjf98e1c02008-10-25 16:22:41 +00002621 Word i, n;
florian4367abe2015-02-28 09:22:09 +00002622 tl_assert(vts);
florianb28fe892014-10-28 20:52:07 +00002623
2624 VG_(printf)("[");
sewardj7aa38a92011-02-27 23:04:12 +00002625 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002626 for (i = 0; i < n; i++) {
florianb28fe892014-10-28 20:52:07 +00002627 const ScalarTS *st = &vts->ts[i];
florian5e5cb002015-08-03 21:21:42 +00002628 VG_(printf)(i < n-1 ? "%d:%llu " : "%d:%llu", st->thrid, (ULong)st->tym);
sewardjf98e1c02008-10-25 16:22:41 +00002629 }
florianb28fe892014-10-28 20:52:07 +00002630 VG_(printf)("]");
sewardjf98e1c02008-10-25 16:22:41 +00002631}
2632
2633
2634/* Debugging only. Return vts[index], so to speak.
2635*/
sewardj7aa38a92011-02-27 23:04:12 +00002636ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2637{
sewardjf98e1c02008-10-25 16:22:41 +00002638 UWord i, n;
sewardje4cce742011-02-24 15:25:24 +00002639 ThrID idx_thrid = Thr__to_ThrID(idx);
sewardjc8028ad2010-05-05 09:34:42 +00002640 stats__vts__indexat_slow++;
florian4367abe2015-02-28 09:22:09 +00002641 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002642 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002643 for (i = 0; i < n; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002644 ScalarTS* st = &vts->ts[i];
sewardje4cce742011-02-24 15:25:24 +00002645 if (st->thrid == idx_thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002646 return st->tym;
2647 }
2648 return 0;
2649}
2650
2651
sewardjffce8152011-06-24 10:09:41 +00002652/* See comment on prototype above.
2653*/
2654static void VTS__declare_thread_very_dead ( Thr* thr )
2655{
2656 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2657
2658 tl_assert(thr->llexit_done);
2659 tl_assert(thr->joinedwith_done);
2660
2661 ThrID nyu;
2662 nyu = Thr__to_ThrID(thr);
philippec3508652015-03-28 12:01:58 +00002663 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
sewardjffce8152011-06-24 10:09:41 +00002664
2665 /* We can only get here if we're assured that we'll never again
2666 need to look at this thread's ::viR or ::viW. Set them to
2667 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2668 mostly so that we don't wind up pruning them (as that would be
2669 nonsensical: the only interesting ScalarTS entry for a dead
2670 thread is its own index, and the pruning will remove that.). */
2671 VtsID__rcdec(thr->viR);
2672 VtsID__rcdec(thr->viW);
2673 thr->viR = VtsID_INVALID;
2674 thr->viW = VtsID_INVALID;
2675}
2676
2677
sewardjf98e1c02008-10-25 16:22:41 +00002678/////////////////////////////////////////////////////////////////
2679/////////////////////////////////////////////////////////////////
2680// //
2681// SECTION END vts primitives //
2682// //
2683/////////////////////////////////////////////////////////////////
2684/////////////////////////////////////////////////////////////////
2685
2686
2687
2688/////////////////////////////////////////////////////////////////
2689/////////////////////////////////////////////////////////////////
2690// //
2691// SECTION BEGIN main library //
2692// //
2693/////////////////////////////////////////////////////////////////
2694/////////////////////////////////////////////////////////////////
2695
2696
2697/////////////////////////////////////////////////////////
2698// //
2699// VTS set //
2700// //
2701/////////////////////////////////////////////////////////
2702
sewardjffce8152011-06-24 10:09:41 +00002703static WordFM* /* WordFM VTS* void */ vts_set = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002704
2705static void vts_set_init ( void )
2706{
2707 tl_assert(!vts_set);
2708 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2709 HG_(free),
2710 (Word(*)(UWord,UWord))VTS__cmp_structural );
sewardjf98e1c02008-10-25 16:22:41 +00002711}
2712
sewardj7aa38a92011-02-27 23:04:12 +00002713/* Given a VTS, look in vts_set to see if we already have a
2714 structurally identical one. If yes, return the pair (True, pointer
2715 to the existing one). If no, clone this one, add the clone to the
2716 set, and return (False, pointer to the clone). */
2717static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002718{
2719 UWord keyW, valW;
sewardj7aa38a92011-02-27 23:04:12 +00002720 stats__vts_set__focaa++;
2721 tl_assert(cand->id == VtsID_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00002722 /* lookup cand (by value) */
2723 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2724 /* found it */
2725 tl_assert(valW == 0);
2726 /* if this fails, cand (by ref) was already present (!) */
2727 tl_assert(keyW != (UWord)cand);
sewardj7aa38a92011-02-27 23:04:12 +00002728 *res = (VTS*)keyW;
2729 return True;
sewardjf98e1c02008-10-25 16:22:41 +00002730 } else {
sewardj7aa38a92011-02-27 23:04:12 +00002731 /* not present. Clone, add and return address of clone. */
2732 stats__vts_set__focaa_a++;
2733 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2734 tl_assert(clone != cand);
2735 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2736 *res = clone;
2737 return False;
sewardjf98e1c02008-10-25 16:22:41 +00002738 }
2739}
2740
2741
2742/////////////////////////////////////////////////////////
2743// //
2744// VTS table //
2745// //
2746/////////////////////////////////////////////////////////
2747
2748static void VtsID__invalidate_caches ( void ); /* fwds */
2749
2750/* A type to hold VTS table entries. Invariants:
2751 If .vts == NULL, then this entry is not in use, so:
2752 - .rc == 0
2753 - this entry is on the freelist (unfortunately, does not imply
philippea1ac2f42015-05-01 17:12:00 +00002754 any constraints on value for u.freelink)
sewardjf98e1c02008-10-25 16:22:41 +00002755 If .vts != NULL, then this entry is in use:
2756 - .vts is findable in vts_set
2757 - .vts->id == this entry number
2758 - no specific value for .rc (even 0 is OK)
philippea1ac2f42015-05-01 17:12:00 +00002759 - this entry is not on freelist, so u.freelink == VtsID_INVALID
sewardjf98e1c02008-10-25 16:22:41 +00002760*/
2761typedef
2762 struct {
2763 VTS* vts; /* vts, in vts_set */
2764 UWord rc; /* reference count - enough for entire aspace */
philippea1ac2f42015-05-01 17:12:00 +00002765 union {
2766 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2767 VtsID remap; /* used only during pruning, for used entries */
2768 } u;
2769 /* u.freelink only used when vts == NULL,
2770 u.remap only used when vts != NULL, during pruning. */
sewardjf98e1c02008-10-25 16:22:41 +00002771 }
2772 VtsTE;
2773
2774/* The VTS table. */
2775static XArray* /* of VtsTE */ vts_tab = NULL;
2776
2777/* An index into the VTS table, indicating the start of the list of
2778 free (available for use) entries. If the list is empty, this is
2779 VtsID_INVALID. */
2780static VtsID vts_tab_freelist = VtsID_INVALID;
2781
2782/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2783 vts_tab equals or exceeds this size. After GC, the value here is
2784 set appropriately so as to check for the next GC point. */
2785static Word vts_next_GC_at = 1000;
2786
2787static void vts_tab_init ( void )
2788{
florian91ed8cc2014-09-15 18:50:17 +00002789 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2790 HG_(free), sizeof(VtsTE) );
2791 vts_tab_freelist = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002792}
2793
2794/* Add ii to the free list, checking that it looks out-of-use. */
2795static void add_to_free_list ( VtsID ii )
2796{
2797 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2798 tl_assert(ie->vts == NULL);
2799 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002800 tl_assert(ie->u.freelink == VtsID_INVALID);
2801 ie->u.freelink = vts_tab_freelist;
sewardjf98e1c02008-10-25 16:22:41 +00002802 vts_tab_freelist = ii;
2803}
2804
2805/* Get an entry from the free list. This will return VtsID_INVALID if
2806 the free list is empty. */
2807static VtsID get_from_free_list ( void )
2808{
2809 VtsID ii;
2810 VtsTE* ie;
2811 if (vts_tab_freelist == VtsID_INVALID)
2812 return VtsID_INVALID;
2813 ii = vts_tab_freelist;
2814 ie = VG_(indexXA)( vts_tab, ii );
2815 tl_assert(ie->vts == NULL);
2816 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002817 vts_tab_freelist = ie->u.freelink;
sewardjf98e1c02008-10-25 16:22:41 +00002818 return ii;
2819}
2820
2821/* Produce a new VtsID that can be used, either by getting it from
2822 the freelist, or, if that is empty, by expanding vts_tab. */
2823static VtsID get_new_VtsID ( void )
2824{
2825 VtsID ii;
2826 VtsTE te;
2827 ii = get_from_free_list();
2828 if (ii != VtsID_INVALID)
2829 return ii;
2830 te.vts = NULL;
2831 te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002832 te.u.freelink = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002833 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2834 return ii;
2835}
2836
2837
2838/* Indirect callback from lib_zsm. */
2839static void VtsID__rcinc ( VtsID ii )
2840{
2841 VtsTE* ie;
2842 /* VG_(indexXA) does a range check for us */
2843 ie = VG_(indexXA)( vts_tab, ii );
2844 tl_assert(ie->vts); /* else it's not in use */
2845 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2846 tl_assert(ie->vts->id == ii);
2847 ie->rc++;
2848}
2849
2850/* Indirect callback from lib_zsm. */
2851static void VtsID__rcdec ( VtsID ii )
2852{
2853 VtsTE* ie;
2854 /* VG_(indexXA) does a range check for us */
2855 ie = VG_(indexXA)( vts_tab, ii );
2856 tl_assert(ie->vts); /* else it's not in use */
2857 tl_assert(ie->rc > 0); /* else RC snafu */
2858 tl_assert(ie->vts->id == ii);
2859 ie->rc--;
2860}
2861
2862
sewardj7aa38a92011-02-27 23:04:12 +00002863/* Look up 'cand' in our collection of VTSs. If present, return the
2864 VtsID for the pre-existing version. If not present, clone it, add
2865 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2866 it, and return that. */
2867static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002868{
sewardj7aa38a92011-02-27 23:04:12 +00002869 VTS* in_tab = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002870 tl_assert(cand->id == VtsID_INVALID);
sewardj7aa38a92011-02-27 23:04:12 +00002871 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2872 tl_assert(in_tab);
2873 if (already_have) {
2874 /* We already have a copy of 'cand'. Use that. */
sewardjf98e1c02008-10-25 16:22:41 +00002875 VtsTE* ie;
sewardj7aa38a92011-02-27 23:04:12 +00002876 tl_assert(in_tab->id != VtsID_INVALID);
2877 ie = VG_(indexXA)( vts_tab, in_tab->id );
2878 tl_assert(ie->vts == in_tab);
2879 return in_tab->id;
sewardjf98e1c02008-10-25 16:22:41 +00002880 } else {
2881 VtsID ii = get_new_VtsID();
2882 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
sewardj7aa38a92011-02-27 23:04:12 +00002883 ie->vts = in_tab;
sewardjf98e1c02008-10-25 16:22:41 +00002884 ie->rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002885 ie->u.freelink = VtsID_INVALID;
sewardj7aa38a92011-02-27 23:04:12 +00002886 in_tab->id = ii;
sewardjf98e1c02008-10-25 16:22:41 +00002887 return ii;
2888 }
2889}
2890
2891
florian6bd9dc12012-11-23 16:17:43 +00002892static void show_vts_stats ( const HChar* caller )
sewardjf98e1c02008-10-25 16:22:41 +00002893{
2894 UWord nSet, nTab, nLive;
2895 ULong totrc;
2896 UWord n, i;
2897 nSet = VG_(sizeFM)( vts_set );
2898 nTab = VG_(sizeXA)( vts_tab );
2899 totrc = 0;
2900 nLive = 0;
2901 n = VG_(sizeXA)( vts_tab );
2902 for (i = 0; i < n; i++) {
2903 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2904 if (ie->vts) {
2905 nLive++;
2906 totrc += (ULong)ie->rc;
2907 } else {
2908 tl_assert(ie->rc == 0);
2909 }
2910 }
2911 VG_(printf)(" show_vts_stats %s\n", caller);
2912 VG_(printf)(" vts_tab size %4lu\n", nTab);
2913 VG_(printf)(" vts_tab live %4lu\n", nLive);
2914 VG_(printf)(" vts_set size %4lu\n", nSet);
2915 VG_(printf)(" total rc %4llu\n", totrc);
2916}
2917
sewardjffce8152011-06-24 10:09:41 +00002918
2919/* --- Helpers for VtsID pruning --- */
2920
2921static
2922void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2923 /*MOD*/XArray* /* of VtsTE */ new_tab,
2924 VtsID* ii )
2925{
2926 VtsTE *old_te, *new_te;
2927 VtsID old_id, new_id;
2928 /* We're relying here on VG_(indexXA)'s range checking to assert on
2929 any stupid values, in particular *ii == VtsID_INVALID. */
2930 old_id = *ii;
2931 old_te = VG_(indexXA)( old_tab, old_id );
2932 old_te->rc--;
philippea1ac2f42015-05-01 17:12:00 +00002933 new_id = old_te->u.remap;
sewardjffce8152011-06-24 10:09:41 +00002934 new_te = VG_(indexXA)( new_tab, new_id );
2935 new_te->rc++;
2936 *ii = new_id;
2937}
2938
2939static
2940void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2941 /*MOD*/XArray* /* of VtsTE */ new_tab,
2942 SVal* s )
2943{
2944 SVal old_sv, new_sv;
2945 old_sv = *s;
2946 if (SVal__isC(old_sv)) {
2947 VtsID rMin, wMin;
2948 rMin = SVal__unC_Rmin(old_sv);
2949 wMin = SVal__unC_Wmin(old_sv);
2950 remap_VtsID( old_tab, new_tab, &rMin );
2951 remap_VtsID( old_tab, new_tab, &wMin );
2952 new_sv = SVal__mkC( rMin, wMin );
2953 *s = new_sv;
2954 }
2955}
2956
2957
sewardjf98e1c02008-10-25 16:22:41 +00002958/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002959__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002960static void vts_tab__do_GC ( Bool show_stats )
2961{
2962 UWord i, nTab, nLive, nFreed;
2963
sewardjffce8152011-06-24 10:09:41 +00002964 /* ---------- BEGIN VTS GC ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00002965 /* check this is actually necessary. */
2966 tl_assert(vts_tab_freelist == VtsID_INVALID);
2967
2968 /* empty the caches for partial order checks and binary joins. We
2969 could do better and prune out the entries to be deleted, but it
2970 ain't worth the hassle. */
2971 VtsID__invalidate_caches();
2972
2973 /* First, make the reference counts up to date. */
2974 zsm_flush_cache();
2975
2976 nTab = VG_(sizeXA)( vts_tab );
2977
2978 if (show_stats) {
2979 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2980 show_vts_stats("before GC");
2981 }
2982
sewardjffce8152011-06-24 10:09:41 +00002983 /* Now we can inspect the entire vts_tab. Any entries with zero
2984 .rc fields are now no longer in use and can be put back on the
sewardjf98e1c02008-10-25 16:22:41 +00002985 free list, removed from vts_set, and deleted. */
2986 nFreed = 0;
2987 for (i = 0; i < nTab; i++) {
2988 Bool present;
sewardjffce8152011-06-24 10:09:41 +00002989 UWord oldK = 0, oldV = 12345;
sewardjf98e1c02008-10-25 16:22:41 +00002990 VtsTE* te = VG_(indexXA)( vts_tab, i );
2991 if (te->vts == NULL) {
2992 tl_assert(te->rc == 0);
2993 continue; /* already on the free list (presumably) */
2994 }
2995 if (te->rc > 0)
2996 continue; /* in use */
2997 /* Ok, we got one we can free. */
2998 tl_assert(te->vts->id == i);
2999 /* first, remove it from vts_set. */
3000 present = VG_(delFromFM)( vts_set,
3001 &oldK, &oldV, (UWord)te->vts );
3002 tl_assert(present); /* else it isn't in vts_set ?! */
3003 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3004 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
3005 /* now free the VTS itself */
3006 VTS__delete(te->vts);
3007 te->vts = NULL;
3008 /* and finally put this entry on the free list */
philippea1ac2f42015-05-01 17:12:00 +00003009 tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
sewardjf98e1c02008-10-25 16:22:41 +00003010 add_to_free_list( i );
3011 nFreed++;
3012 }
3013
3014 /* Now figure out when the next GC should be. We'll allow the
3015 number of VTSs to double before GCing again. Except of course
3016 that since we can't (or, at least, don't) shrink vts_tab, we
florianad4e9792015-07-05 21:53:33 +00003017 can't set the threshold value smaller than it. */
sewardjf98e1c02008-10-25 16:22:41 +00003018 tl_assert(nFreed <= nTab);
3019 nLive = nTab - nFreed;
3020 tl_assert(nLive >= 0 && nLive <= nTab);
3021 vts_next_GC_at = 2 * nLive;
3022 if (vts_next_GC_at < nTab)
3023 vts_next_GC_at = nTab;
3024
3025 if (show_stats) {
3026 show_vts_stats("after GC");
3027 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
3028 }
3029
philippe2bd23262015-05-11 20:56:49 +00003030 stats__vts_tab_GC++;
sewardj5e2ac3b2009-08-11 10:39:25 +00003031 if (VG_(clo_stats)) {
sewardjf98e1c02008-10-25 16:22:41 +00003032 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00003033 VG_(message)(Vg_DebugMsg,
philippef54cb662015-05-10 22:19:31 +00003034 "libhb: VTS GC: #%lu old size %lu live %lu (%2llu%%)\n",
3035 stats__vts_tab_GC,
3036 nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00003037 }
sewardjffce8152011-06-24 10:09:41 +00003038 /* ---------- END VTS GC ---------- */
3039
3040 /* Decide whether to do VTS pruning. We have one of three
3041 settings. */
3042 static UInt pruning_auto_ctr = 0; /* do not make non-static */
3043
3044 Bool do_pruning = False;
3045 switch (HG_(clo_vts_pruning)) {
3046 case 0: /* never */
3047 break;
3048 case 1: /* auto */
3049 do_pruning = (++pruning_auto_ctr % 5) == 0;
3050 break;
3051 case 2: /* always */
3052 do_pruning = True;
3053 break;
3054 default:
3055 tl_assert(0);
3056 }
3057
3058 /* The rest of this routine only handles pruning, so we can
3059 quit at this point if it is not to be done. */
3060 if (!do_pruning)
3061 return;
philippec3508652015-03-28 12:01:58 +00003062 /* No need to do pruning if no thread died since the last pruning as
3063 no VtsTE can be pruned. */
3064 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
3065 return;
sewardjffce8152011-06-24 10:09:41 +00003066
3067 /* ---------- BEGIN VTS PRUNING ---------- */
philippec3508652015-03-28 12:01:58 +00003068 /* Sort and check the very dead threads that died since the last pruning.
3069 Sorting is used for the check and so that we can quickly look
sewardjffce8152011-06-24 10:09:41 +00003070 up the dead-thread entries as we work through the VTSs. */
philippec3508652015-03-28 12:01:58 +00003071 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003072
3073 /* We will run through the old table, and create a new table and
philippea1ac2f42015-05-01 17:12:00 +00003074 set, at the same time setting the u.remap entries in the old
sewardjffce8152011-06-24 10:09:41 +00003075 table to point to the new entries. Then, visit every VtsID in
3076 the system, and replace all of them with new ones, using the
philippea1ac2f42015-05-01 17:12:00 +00003077 u.remap entries in the old table. Finally, we can delete the old
sewardjffce8152011-06-24 10:09:41 +00003078 table and set. */
3079
3080 XArray* /* of VtsTE */ new_tab
3081 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
3082 HG_(free), sizeof(VtsTE) );
3083
3084 /* WordFM VTS* void */
3085 WordFM* new_set
3086 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
3087 HG_(free),
3088 (Word(*)(UWord,UWord))VTS__cmp_structural );
3089
3090 /* Visit each old VTS. For each one:
3091
3092 * make a pruned version
3093
3094 * search new_set for the pruned version, yielding either
3095 Nothing (not present) or the new VtsID for it.
3096
3097 * if not present, allocate a new VtsID for it, insert (pruned
3098 VTS, new VtsID) in the tree, and set
3099 remap_table[old VtsID] = new VtsID.
3100
3101 * if present, set remap_table[old VtsID] = new VtsID, where
3102 new VtsID was determined by the tree lookup. Then free up
3103 the clone.
3104 */
3105
3106 UWord nBeforePruning = 0, nAfterPruning = 0;
3107 UWord nSTSsBefore = 0, nSTSsAfter = 0;
3108 VtsID new_VtsID_ctr = 0;
3109
3110 for (i = 0; i < nTab; i++) {
3111
3112 /* For each old VTS .. */
3113 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
3114 VTS* old_vts = old_te->vts;
sewardjffce8152011-06-24 10:09:41 +00003115
3116 /* Skip it if not in use */
3117 if (old_te->rc == 0) {
3118 tl_assert(old_vts == NULL);
3119 continue;
3120 }
philippea1ac2f42015-05-01 17:12:00 +00003121 tl_assert(old_te->u.remap == VtsID_INVALID);
sewardjffce8152011-06-24 10:09:41 +00003122 tl_assert(old_vts != NULL);
3123 tl_assert(old_vts->id == i);
3124 tl_assert(old_vts->ts != NULL);
3125
3126 /* It is in use. Make a pruned version. */
3127 nBeforePruning++;
3128 nSTSsBefore += old_vts->usedTS;
3129 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
philippec3508652015-03-28 12:01:58 +00003130 old_vts, verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003131 tl_assert(new_vts->sizeTS == new_vts->usedTS);
3132 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
3133 == 0x0ddC0ffeeBadF00dULL);
3134
3135 /* Get rid of the old VTS and the tree entry. It's a bit more
3136 complex to incrementally delete the VTSs now than to nuke
3137 them all after we're done, but the upside is that we don't
3138 wind up temporarily storing potentially two complete copies
3139 of each VTS and hence spiking memory use. */
3140 UWord oldK = 0, oldV = 12345;
3141 Bool present = VG_(delFromFM)( vts_set,
3142 &oldK, &oldV, (UWord)old_vts );
3143 tl_assert(present); /* else it isn't in vts_set ?! */
3144 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3145 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
3146 /* now free the VTS itself */
3147 VTS__delete(old_vts);
3148 old_te->vts = NULL;
3149 old_vts = NULL;
3150
3151 /* NO MENTIONS of old_vts allowed beyond this point. */
3152
3153 /* Ok, we have the pruned copy in new_vts. See if a
3154 structurally identical version is already present in new_set.
3155 If so, delete the one we just made and move on; if not, add
3156 it. */
3157 VTS* identical_version = NULL;
3158 UWord valW = 12345;
3159 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
3160 (UWord)new_vts)) {
3161 // already have it
3162 tl_assert(valW == 0);
3163 tl_assert(identical_version != NULL);
3164 tl_assert(identical_version != new_vts);
3165 VTS__delete(new_vts);
3166 new_vts = identical_version;
3167 tl_assert(new_vts->id != VtsID_INVALID);
3168 } else {
3169 tl_assert(valW == 12345);
3170 tl_assert(identical_version == NULL);
3171 new_vts->id = new_VtsID_ctr++;
3172 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
3173 tl_assert(!b);
3174 VtsTE new_te;
3175 new_te.vts = new_vts;
3176 new_te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00003177 new_te.u.freelink = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003178 Word j = VG_(addToXA)( new_tab, &new_te );
3179 tl_assert(j <= i);
3180 tl_assert(j == new_VtsID_ctr - 1);
3181 // stats
3182 nAfterPruning++;
3183 nSTSsAfter += new_vts->usedTS;
3184 }
philippea1ac2f42015-05-01 17:12:00 +00003185 old_te->u.remap = new_vts->id;
sewardjffce8152011-06-24 10:09:41 +00003186
3187 } /* for (i = 0; i < nTab; i++) */
3188
philippec3508652015-03-28 12:01:58 +00003189 /* Move very dead thread from verydead_thread_table_not_pruned to
3190 verydead_thread_table. Sort and check verydead_thread_table
3191 to verify a thread was reported very dead only once. */
3192 {
3193 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
3194
3195 for (i = 0; i < nBT; i++) {
3196 ThrID thrid =
3197 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
3198 VG_(addToXA)( verydead_thread_table, &thrid );
3199 }
3200 verydead_thread_table_sort_and_check (verydead_thread_table);
3201 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
3202 }
3203
sewardjffce8152011-06-24 10:09:41 +00003204 /* At this point, we have:
philippea1ac2f42015-05-01 17:12:00 +00003205 * the old VTS table, with its u.remap entries set,
sewardjffce8152011-06-24 10:09:41 +00003206 and with all .vts == NULL.
3207 * the old VTS tree should be empty, since it and the old VTSs
3208 it contained have been incrementally deleted was we worked
3209 through the old table.
philippea1ac2f42015-05-01 17:12:00 +00003210 * the new VTS table, with all .rc == 0, all u.freelink and u.remap
sewardjffce8152011-06-24 10:09:41 +00003211 == VtsID_INVALID.
3212 * the new VTS tree.
3213 */
3214 tl_assert( VG_(sizeFM)(vts_set) == 0 );
3215
3216 /* Now actually apply the mapping. */
3217 /* Visit all the VtsIDs in the entire system. Where do we expect
3218 to find them?
3219 (a) in shadow memory -- the LineZs and LineFs
3220 (b) in our collection of struct _Thrs.
3221 (c) in our collection of struct _SOs.
3222 Nowhere else, AFAICS. Not in the zsm cache, because that just
3223 got invalidated.
3224
philippea1ac2f42015-05-01 17:12:00 +00003225 Using the u.remap fields in vts_tab, map each old VtsID to a new
sewardjffce8152011-06-24 10:09:41 +00003226 VtsID. For each old VtsID, dec its rc; and for each new one,
3227 inc it. This sets up the new refcounts, and it also gives a
3228 cheap sanity check of the old ones: all old refcounts should be
3229 zero after this operation.
3230 */
3231
3232 /* Do the mappings for (a) above: iterate over the Primary shadow
3233 mem map (WordFM Addr SecMap*). */
3234 UWord secmapW = 0;
3235 VG_(initIterFM)( map_shmem );
3236 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3237 UWord j;
3238 SecMap* sm = (SecMap*)secmapW;
3239 tl_assert(sm->magic == SecMap_MAGIC);
3240 /* Deal with the LineZs */
3241 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3242 LineZ* lineZ = &sm->linesZ[i];
philippe71ed3c92015-05-17 19:32:42 +00003243 if (lineZ->dict[0] != SVal_INVALID) {
3244 for (j = 0; j < 4; j++)
3245 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3246 } else {
3247 LineF* lineF = SVal2Ptr (lineZ->dict[1]);
3248 for (j = 0; j < N_LINE_ARANGE; j++)
3249 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3250 }
sewardjffce8152011-06-24 10:09:41 +00003251 }
3252 }
3253 VG_(doneIterFM)( map_shmem );
3254
3255 /* Do the mappings for (b) above: visit our collection of struct
3256 _Thrs. */
3257 Thread* hgthread = get_admin_threads();
3258 tl_assert(hgthread);
3259 while (hgthread) {
3260 Thr* hbthr = hgthread->hbthr;
3261 tl_assert(hbthr);
3262 /* Threads that are listed in the prunable set have their viR
3263 and viW set to VtsID_INVALID, so we can't mess with them. */
3264 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3265 tl_assert(hbthr->viR == VtsID_INVALID);
3266 tl_assert(hbthr->viW == VtsID_INVALID);
3267 hgthread = hgthread->admin;
3268 continue;
3269 }
3270 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3271 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3272 hgthread = hgthread->admin;
3273 }
3274
3275 /* Do the mappings for (c) above: visit the struct _SOs. */
3276 SO* so = admin_SO;
3277 while (so) {
3278 if (so->viR != VtsID_INVALID)
3279 remap_VtsID( vts_tab, new_tab, &so->viR );
3280 if (so->viW != VtsID_INVALID)
3281 remap_VtsID( vts_tab, new_tab, &so->viW );
3282 so = so->admin_next;
3283 }
3284
3285 /* So, we're nearly done (with this incredibly complex operation).
3286 Check the refcounts for the old VtsIDs all fell to zero, as
3287 expected. Any failure is serious. */
3288 for (i = 0; i < nTab; i++) {
3289 VtsTE* te = VG_(indexXA)( vts_tab, i );
3290 tl_assert(te->vts == NULL);
3291 /* This is the assert proper. Note we're also asserting
philippea1ac2f42015-05-01 17:12:00 +00003292 zeroness for old entries which are unmapped. That's OK. */
sewardjffce8152011-06-24 10:09:41 +00003293 tl_assert(te->rc == 0);
3294 }
3295
3296 /* Install the new table and set. */
3297 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3298 vts_set = new_set;
3299 VG_(deleteXA)( vts_tab );
3300 vts_tab = new_tab;
3301
3302 /* The freelist of vts_tab entries is empty now, because we've
3303 compacted all of the live entries at the low end of the
3304 table. */
3305 vts_tab_freelist = VtsID_INVALID;
3306
3307 /* Sanity check vts_set and vts_tab. */
3308
3309 /* Because all the live entries got slid down to the bottom of vts_tab: */
3310 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3311
3312 /* Assert that the vts_tab and vts_set entries point at each other
3313 in the required way */
3314 UWord wordK = 0, wordV = 0;
3315 VG_(initIterFM)( vts_set );
3316 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3317 tl_assert(wordK != 0);
3318 tl_assert(wordV == 0);
3319 VTS* vts = (VTS*)wordK;
3320 tl_assert(vts->id != VtsID_INVALID);
3321 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3322 tl_assert(te->vts == vts);
3323 }
3324 VG_(doneIterFM)( vts_set );
3325
3326 /* Also iterate over the table, and check each entry is
3327 plausible. */
3328 nTab = VG_(sizeXA)( vts_tab );
3329 for (i = 0; i < nTab; i++) {
3330 VtsTE* te = VG_(indexXA)( vts_tab, i );
3331 tl_assert(te->vts);
3332 tl_assert(te->vts->id == i);
3333 tl_assert(te->rc > 0); /* 'cos we just GC'd */
philippea1ac2f42015-05-01 17:12:00 +00003334 tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3335 /* value of te->u.remap not relevant */
sewardjffce8152011-06-24 10:09:41 +00003336 }
3337
3338 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
philippe2bd23262015-05-11 20:56:49 +00003339 stats__vts_pruning++;
sewardjffce8152011-06-24 10:09:41 +00003340 if (VG_(clo_stats)) {
sewardjffce8152011-06-24 10:09:41 +00003341 tl_assert(nTab > 0);
3342 VG_(message)(
3343 Vg_DebugMsg,
philippe2bd23262015-05-11 20:56:49 +00003344 "libhb: VTS PR: #%lu before %lu (avg sz %lu) "
sewardjffce8152011-06-24 10:09:41 +00003345 "after %lu (avg sz %lu)\n",
philippe2bd23262015-05-11 20:56:49 +00003346 stats__vts_pruning,
sewardjffce8152011-06-24 10:09:41 +00003347 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3348 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3349 );
3350 }
sewardjffce8152011-06-24 10:09:41 +00003351 /* ---------- END VTS PRUNING ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00003352}
3353
3354
3355/////////////////////////////////////////////////////////
3356// //
3357// Vts IDs //
3358// //
3359/////////////////////////////////////////////////////////
3360
3361//////////////////////////
sewardj7aa38a92011-02-27 23:04:12 +00003362/* A temporary, max-sized VTS which is used as a temporary (the first
3363 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3364static VTS* temp_max_sized_VTS = NULL;
3365
3366//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00003367static ULong stats__cmpLEQ_queries = 0;
3368static ULong stats__cmpLEQ_misses = 0;
3369static ULong stats__join2_queries = 0;
3370static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003371
3372static inline UInt ROL32 ( UInt w, Int n ) {
3373 w = (w << n) | (w >> (32-n));
3374 return w;
3375}
3376static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3377 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3378 return hash % nTab;
3379}
3380
sewardj23f12002009-07-24 08:45:08 +00003381#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00003382static
sewardj23f12002009-07-24 08:45:08 +00003383 struct { VtsID vi1; VtsID vi2; Bool leq; }
3384 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00003385
3386#define N_JOIN2_CACHE 1023
3387static
3388 struct { VtsID vi1; VtsID vi2; VtsID res; }
3389 join2_cache[N_JOIN2_CACHE];
3390
3391static void VtsID__invalidate_caches ( void ) {
3392 Int i;
sewardj23f12002009-07-24 08:45:08 +00003393 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3394 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3395 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3396 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00003397 }
3398 for (i = 0; i < N_JOIN2_CACHE; i++) {
3399 join2_cache[i].vi1 = VtsID_INVALID;
3400 join2_cache[i].vi2 = VtsID_INVALID;
3401 join2_cache[i].res = VtsID_INVALID;
3402 }
3403}
3404//////////////////////////
3405
sewardjd52392d2008-11-08 20:36:26 +00003406//static Bool VtsID__is_valid ( VtsID vi ) {
3407// VtsTE* ve;
3408// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3409// return False;
3410// ve = VG_(indexXA)( vts_tab, vi );
3411// if (!ve->vts)
3412// return False;
3413// tl_assert(ve->vts->id == vi);
3414// return True;
3415//}
sewardjf98e1c02008-10-25 16:22:41 +00003416
3417static VTS* VtsID__to_VTS ( VtsID vi ) {
3418 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3419 tl_assert(te->vts);
3420 return te->vts;
3421}
3422
3423static void VtsID__pp ( VtsID vi ) {
sewardjf98e1c02008-10-25 16:22:41 +00003424 VTS* vts = VtsID__to_VTS(vi);
florianb28fe892014-10-28 20:52:07 +00003425 VTS__show( vts );
sewardjf98e1c02008-10-25 16:22:41 +00003426}
3427
3428/* compute partial ordering relation of vi1 and vi2. */
3429__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00003430static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00003431 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00003432 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00003433 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00003434 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00003435 tl_assert(vi1 != vi2);
3436 ////++
sewardj23f12002009-07-24 08:45:08 +00003437 stats__cmpLEQ_queries++;
3438 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3439 if (cmpLEQ_cache[hash].vi1 == vi1
3440 && cmpLEQ_cache[hash].vi2 == vi2)
3441 return cmpLEQ_cache[hash].leq;
3442 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00003443 ////--
3444 v1 = VtsID__to_VTS(vi1);
3445 v2 = VtsID__to_VTS(vi2);
sewardje4cce742011-02-24 15:25:24 +00003446 leq = VTS__cmpLEQ( v1, v2 ) == 0;
sewardjf98e1c02008-10-25 16:22:41 +00003447 ////++
sewardj23f12002009-07-24 08:45:08 +00003448 cmpLEQ_cache[hash].vi1 = vi1;
3449 cmpLEQ_cache[hash].vi2 = vi2;
3450 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00003451 ////--
sewardj23f12002009-07-24 08:45:08 +00003452 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00003453}
sewardj23f12002009-07-24 08:45:08 +00003454static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3455 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003456}
3457
3458/* compute binary join */
3459__attribute__((noinline))
3460static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3461 UInt hash;
3462 VtsID res;
sewardj7aa38a92011-02-27 23:04:12 +00003463 VTS *vts1, *vts2;
sewardjf98e1c02008-10-25 16:22:41 +00003464 //if (vi1 == vi2) return vi1;
3465 tl_assert(vi1 != vi2);
3466 ////++
3467 stats__join2_queries++;
3468 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3469 if (join2_cache[hash].vi1 == vi1
3470 && join2_cache[hash].vi2 == vi2)
3471 return join2_cache[hash].res;
3472 stats__join2_misses++;
3473 ////--
3474 vts1 = VtsID__to_VTS(vi1);
3475 vts2 = VtsID__to_VTS(vi2);
sewardj7aa38a92011-02-27 23:04:12 +00003476 temp_max_sized_VTS->usedTS = 0;
3477 VTS__join(temp_max_sized_VTS, vts1,vts2);
3478 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003479 ////++
3480 join2_cache[hash].vi1 = vi1;
3481 join2_cache[hash].vi2 = vi2;
3482 join2_cache[hash].res = res;
3483 ////--
3484 return res;
3485}
3486static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003487 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003488}
3489
3490/* create a singleton VTS, namely [thr:1] */
3491static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
sewardj7aa38a92011-02-27 23:04:12 +00003492 temp_max_sized_VTS->usedTS = 0;
3493 VTS__singleton(temp_max_sized_VTS, thr,tym);
3494 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003495}
3496
3497/* tick operation, creates value 1 if specified index is absent */
3498static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3499 VTS* vts = VtsID__to_VTS(vi);
sewardj7aa38a92011-02-27 23:04:12 +00003500 temp_max_sized_VTS->usedTS = 0;
3501 VTS__tick(temp_max_sized_VTS, idx,vts);
3502 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003503}
3504
3505/* index into a VTS (only for assertions) */
3506static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3507 VTS* vts = VtsID__to_VTS(vi);
3508 return VTS__indexAt_SLOW( vts, idx );
3509}
3510
sewardj23f12002009-07-24 08:45:08 +00003511/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3512 any, really) element in vi1 which is pointwise greater-than the
3513 corresponding element in vi2. If no such element exists, return
3514 NULL. This needs to be fairly quick since it is called every time
3515 a race is detected. */
3516static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3517{
3518 VTS *vts1, *vts2;
sewardje4cce742011-02-24 15:25:24 +00003519 Thr* diffthr;
3520 ThrID diffthrid;
sewardj23f12002009-07-24 08:45:08 +00003521 tl_assert(vi1 != vi2);
3522 vts1 = VtsID__to_VTS(vi1);
3523 vts2 = VtsID__to_VTS(vi2);
3524 tl_assert(vts1 != vts2);
sewardje4cce742011-02-24 15:25:24 +00003525 diffthrid = VTS__cmpLEQ(vts1, vts2);
3526 diffthr = Thr__from_ThrID(diffthrid);
sewardj23f12002009-07-24 08:45:08 +00003527 tl_assert(diffthr); /* else they are LEQ ! */
3528 return diffthr;
3529}
3530
3531
3532/////////////////////////////////////////////////////////
3533// //
3534// Filters //
3535// //
3536/////////////////////////////////////////////////////////
3537
sewardj23f12002009-07-24 08:45:08 +00003538/* Forget everything we know -- clear the filter and let everything
3539 through. This needs to be as fast as possible, since it is called
3540 every time the running thread changes, and every time a thread's
3541 vector clocks change, which can be quite frequent. The obvious
3542 fast way to do this is simply to stuff in tags which we know are
3543 not going to match anything, since they're not aligned to the start
3544 of a line. */
florian6bd9dc12012-11-23 16:17:43 +00003545static void Filter__clear ( Filter* fi, const HChar* who )
sewardj23f12002009-07-24 08:45:08 +00003546{
3547 UWord i;
3548 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3549 for (i = 0; i < FI_NUM_LINES; i += 8) {
3550 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3551 fi->tags[i+1] = 1;
3552 fi->tags[i+2] = 1;
3553 fi->tags[i+3] = 1;
3554 fi->tags[i+4] = 1;
3555 fi->tags[i+5] = 1;
3556 fi->tags[i+6] = 1;
3557 fi->tags[i+7] = 1;
3558 }
3559 tl_assert(i == FI_NUM_LINES);
3560}
3561
3562/* Clearing an arbitrary range in the filter. Unfortunately
3563 we have to do this due to core-supplied new/die-mem events. */
3564
3565static void Filter__clear_1byte ( Filter* fi, Addr a )
3566{
3567 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3568 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3569 FiLine* line = &fi->lines[lineno];
3570 UWord loff = (a - atag) / 8;
3571 UShort mask = 0x3 << (2 * (a & 7));
3572 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3573 if (LIKELY( fi->tags[lineno] == atag )) {
3574 /* hit. clear the bits. */
3575 UShort u16 = line->u16s[loff];
3576 line->u16s[loff] = u16 & ~mask; /* clear them */
3577 } else {
3578 /* miss. The filter doesn't hold this address, so ignore. */
3579 }
3580}
3581
3582static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3583{
3584 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3585 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3586 FiLine* line = &fi->lines[lineno];
3587 UWord loff = (a - atag) / 8;
3588 if (LIKELY( fi->tags[lineno] == atag )) {
3589 line->u16s[loff] = 0;
3590 } else {
3591 /* miss. The filter doesn't hold this address, so ignore. */
3592 }
3593}
3594
philippefc00a2a2015-05-15 11:41:54 +00003595/* Only used to verify the fast Filter__clear_range */
3596__attribute__((unused))
3597static void Filter__clear_range_SLOW ( Filter* fi, Addr a, UWord len )
sewardj23f12002009-07-24 08:45:08 +00003598{
philippefc00a2a2015-05-15 11:41:54 +00003599 tl_assert (CHECK_ZSM);
3600
sewardj23f12002009-07-24 08:45:08 +00003601 /* slowly do part preceding 8-alignment */
3602 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3603 Filter__clear_1byte( fi, a );
3604 a++;
3605 len--;
3606 }
3607 /* vector loop */
3608 while (len >= 8) {
3609 Filter__clear_8bytes_aligned( fi, a );
3610 a += 8;
3611 len -= 8;
3612 }
3613 /* slowly do tail */
3614 while (UNLIKELY(len > 0)) {
3615 Filter__clear_1byte( fi, a );
3616 a++;
3617 len--;
3618 }
3619}
3620
philippefc00a2a2015-05-15 11:41:54 +00003621static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3622{
3623# if CHECK_ZSM > 0
3624 /* We check the below more complex algorithm with the simple one.
3625 This check is very expensive : we do first the slow way on a
3626 copy of the data, then do it the fast way. On RETURN, we check
3627 the two values are equal. */
3628 Filter fi_check = *fi;
3629 Filter__clear_range_SLOW(&fi_check, a, len);
3630# define RETURN goto check_and_return
3631# else
3632# define RETURN return
3633# endif
3634
3635 Addr begtag = FI_GET_TAG(a); /* tag of range begin */
3636
3637 Addr end = a + len - 1;
3638 Addr endtag = FI_GET_TAG(end); /* tag of range end. */
3639
3640 UWord rlen = len; /* remaining length to clear */
3641
3642 Addr c = a; /* Current position we are clearing. */
3643 UWord clineno = FI_GET_LINENO(c); /* Current lineno we are clearing */
3644 FiLine* cline; /* Current line we are clearing */
3645 UWord cloff; /* Current offset in line we are clearing, when clearing
3646 partial lines. */
3647
3648 UShort u16;
3649
3650 STATIC_ASSERT (FI_LINE_SZB == 32);
3651 // Below assumes filter lines are 32 bytes
3652
3653 if (LIKELY(fi->tags[clineno] == begtag)) {
3654 /* LIKELY for the heavy caller VG_(unknown_SP_update). */
3655 /* First filter line matches begtag.
3656 If c is not at the filter line begin, the below will clear
3657 the filter line bytes starting from c. */
3658 cline = &fi->lines[clineno];
3659 cloff = (c - begtag) / 8;
3660
3661 /* First the byte(s) needed to reach 8-alignment */
3662 if (UNLIKELY(!VG_IS_8_ALIGNED(c))) {
3663 /* hiB is the nr of bytes (higher addresses) from c to reach
3664 8-aligment. */
3665 UWord hiB = 8 - (c & 7);
3666 /* Compute 2-bit/byte mask representing hiB bytes [c..c+hiB[
3667 mask is C000 , F000, FC00, FF00, FFC0, FFF0 or FFFC for the byte
3668 range 7..7 6..7 5..7 4..7 3..7 2..7 1..7 */
3669 UShort mask = 0xFFFF << (16 - 2*hiB);
3670
3671 u16 = cline->u16s[cloff];
3672 if (LIKELY(rlen >= hiB)) {
3673 cline->u16s[cloff] = u16 & ~mask; /* clear all hiB from c */
3674 rlen -= hiB;
3675 c += hiB;
3676 cloff += 1;
3677 } else {
3678 /* Only have the bits for rlen bytes bytes. */
3679 mask = mask & ~(0xFFFF << (16 - 2*(hiB-rlen)));
3680 cline->u16s[cloff] = u16 & ~mask; /* clear rlen bytes from c. */
3681 RETURN; // We have cleared all what we can.
3682 }
3683 }
3684 /* c is now 8 aligned. Clear by 8 aligned bytes,
3685 till c is filter-line aligned */
3686 while (!VG_IS_32_ALIGNED(c) && rlen >= 8) {
3687 cline->u16s[cloff] = 0;
3688 c += 8;
3689 rlen -= 8;
3690 cloff += 1;
3691 }
3692 } else {
3693 c = begtag + FI_LINE_SZB;
3694 if (c > end)
3695 RETURN; // We have cleared all what we can.
3696 rlen -= c - a;
3697 }
3698 // We have changed c, so re-establish clineno.
3699 clineno = FI_GET_LINENO(c);
3700
3701 if (rlen >= FI_LINE_SZB) {
3702 /* Here, c is filter line-aligned. Clear all full lines that
3703 overlap with the range starting at c, made of a full lines */
3704 UWord nfull = rlen / FI_LINE_SZB;
3705 UWord full_len = nfull * FI_LINE_SZB;
3706 rlen -= full_len;
3707 if (nfull > FI_NUM_LINES)
3708 nfull = FI_NUM_LINES; // no need to check several times the same entry.
3709
3710 for (UWord n = 0; n < nfull; n++) {
3711 if (UNLIKELY(address_in_range(fi->tags[clineno], c, full_len))) {
3712 cline = &fi->lines[clineno];
3713 cline->u16s[0] = 0;
3714 cline->u16s[1] = 0;
3715 cline->u16s[2] = 0;
3716 cline->u16s[3] = 0;
3717 STATIC_ASSERT (4 == sizeof(cline->u16s)/sizeof(cline->u16s[0]));
3718 }
3719 clineno++;
3720 if (UNLIKELY(clineno == FI_NUM_LINES))
3721 clineno = 0;
3722 }
3723
3724 c += full_len;
3725 clineno = FI_GET_LINENO(c);
3726 }
3727
3728 if (CHECK_ZSM) {
3729 tl_assert(VG_IS_8_ALIGNED(c));
3730 tl_assert(clineno == FI_GET_LINENO(c));
3731 }
3732
3733 /* Do the last filter line, if it was not cleared as a full filter line */
3734 if (UNLIKELY(rlen > 0) && fi->tags[clineno] == endtag) {
3735 cline = &fi->lines[clineno];
3736 cloff = (c - endtag) / 8;
3737 if (CHECK_ZSM) tl_assert(FI_GET_TAG(c) == endtag);
3738
3739 /* c is 8 aligned. Clear by 8 aligned bytes, till we have less than
3740 8 bytes. */
3741 while (rlen >= 8) {
3742 cline->u16s[cloff] = 0;
3743 c += 8;
3744 rlen -= 8;
3745 cloff += 1;
3746 }
3747 /* Then the remaining byte(s) */
3748 if (rlen > 0) {
3749 /* nr of bytes from c to reach end. */
3750 UWord loB = rlen;
3751 /* Compute mask representing loB bytes [c..c+loB[ :
3752 mask is 0003, 000F, 003F, 00FF, 03FF, 0FFF or 3FFF */
3753 UShort mask = 0xFFFF >> (16 - 2*loB);
3754
3755 u16 = cline->u16s[cloff];
3756 cline->u16s[cloff] = u16 & ~mask; /* clear all loB from c */
3757 }
3758 }
3759
3760# if CHECK_ZSM > 0
3761 check_and_return:
3762 tl_assert (VG_(memcmp)(&fi_check, fi, sizeof(fi_check)) == 0);
3763# endif
3764# undef RETURN
3765}
sewardj23f12002009-07-24 08:45:08 +00003766
3767/* ------ Read handlers for the filter. ------ */
3768
3769static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3770{
3771 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3772 return False;
3773 {
3774 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3775 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3776 FiLine* line = &fi->lines[lineno];
3777 UWord loff = (a - atag) / 8;
3778 UShort mask = 0xAAAA;
3779 if (LIKELY( fi->tags[lineno] == atag )) {
3780 /* hit. check line and update. */
3781 UShort u16 = line->u16s[loff];
3782 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3783 line->u16s[loff] = u16 | mask; /* set them */
3784 return ok;
3785 } else {
3786 /* miss. nuke existing line and re-use it. */
3787 UWord i;
3788 fi->tags[lineno] = atag;
3789 for (i = 0; i < FI_LINE_SZB / 8; i++)
3790 line->u16s[i] = 0;
3791 line->u16s[loff] = mask;
3792 return False;
3793 }
3794 }
3795}
3796
3797static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3798{
3799 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3800 return False;
3801 {
3802 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3803 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3804 FiLine* line = &fi->lines[lineno];
3805 UWord loff = (a - atag) / 8;
3806 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3807 if (LIKELY( fi->tags[lineno] == atag )) {
3808 /* hit. check line and update. */
3809 UShort u16 = line->u16s[loff];
3810 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3811 line->u16s[loff] = u16 | mask; /* set them */
3812 return ok;
3813 } else {
3814 /* miss. nuke existing line and re-use it. */
3815 UWord i;
3816 fi->tags[lineno] = atag;
3817 for (i = 0; i < FI_LINE_SZB / 8; i++)
3818 line->u16s[i] = 0;
3819 line->u16s[loff] = mask;
3820 return False;
3821 }
3822 }
3823}
3824
3825static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3826{
3827 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3828 return False;
3829 {
3830 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3831 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3832 FiLine* line = &fi->lines[lineno];
3833 UWord loff = (a - atag) / 8;
3834 UShort mask = 0xA << (2 * (a & 6));
3835 /* mask is A000, 0A00, 00A0 or 000A */
3836 if (LIKELY( fi->tags[lineno] == atag )) {
3837 /* hit. check line and update. */
3838 UShort u16 = line->u16s[loff];
3839 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3840 line->u16s[loff] = u16 | mask; /* set them */
3841 return ok;
3842 } else {
3843 /* miss. nuke existing line and re-use it. */
3844 UWord i;
3845 fi->tags[lineno] = atag;
3846 for (i = 0; i < FI_LINE_SZB / 8; i++)
3847 line->u16s[i] = 0;
3848 line->u16s[loff] = mask;
3849 return False;
3850 }
3851 }
3852}
3853
3854static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3855{
3856 {
3857 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3858 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3859 FiLine* line = &fi->lines[lineno];
3860 UWord loff = (a - atag) / 8;
3861 UShort mask = 0x2 << (2 * (a & 7));
3862 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3863 if (LIKELY( fi->tags[lineno] == atag )) {
3864 /* hit. check line and update. */
3865 UShort u16 = line->u16s[loff];
3866 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3867 line->u16s[loff] = u16 | mask; /* set them */
3868 return ok;
3869 } else {
3870 /* miss. nuke existing line and re-use it. */
3871 UWord i;
3872 fi->tags[lineno] = atag;
3873 for (i = 0; i < FI_LINE_SZB / 8; i++)
3874 line->u16s[i] = 0;
3875 line->u16s[loff] = mask;
3876 return False;
3877 }
3878 }
3879}
3880
3881
3882/* ------ Write handlers for the filter. ------ */
3883
3884static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3885{
3886 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3887 return False;
3888 {
3889 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3890 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3891 FiLine* line = &fi->lines[lineno];
3892 UWord loff = (a - atag) / 8;
3893 UShort mask = 0xFFFF;
3894 if (LIKELY( fi->tags[lineno] == atag )) {
3895 /* hit. check line and update. */
3896 UShort u16 = line->u16s[loff];
3897 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3898 line->u16s[loff] = u16 | mask; /* set them */
3899 return ok;
3900 } else {
3901 /* miss. nuke existing line and re-use it. */
3902 UWord i;
3903 fi->tags[lineno] = atag;
3904 for (i = 0; i < FI_LINE_SZB / 8; i++)
3905 line->u16s[i] = 0;
3906 line->u16s[loff] = mask;
3907 return False;
3908 }
3909 }
3910}
3911
3912static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3913{
3914 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3915 return False;
3916 {
3917 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3918 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3919 FiLine* line = &fi->lines[lineno];
3920 UWord loff = (a - atag) / 8;
3921 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3922 if (LIKELY( fi->tags[lineno] == atag )) {
3923 /* hit. check line and update. */
3924 UShort u16 = line->u16s[loff];
3925 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3926 line->u16s[loff] = u16 | mask; /* set them */
3927 return ok;
3928 } else {
3929 /* miss. nuke existing line and re-use it. */
3930 UWord i;
3931 fi->tags[lineno] = atag;
3932 for (i = 0; i < FI_LINE_SZB / 8; i++)
3933 line->u16s[i] = 0;
3934 line->u16s[loff] = mask;
3935 return False;
3936 }
3937 }
3938}
3939
3940static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3941{
3942 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3943 return False;
3944 {
3945 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3946 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3947 FiLine* line = &fi->lines[lineno];
3948 UWord loff = (a - atag) / 8;
3949 UShort mask = 0xF << (2 * (a & 6));
3950 /* mask is F000, 0F00, 00F0 or 000F */
3951 if (LIKELY( fi->tags[lineno] == atag )) {
3952 /* hit. check line and update. */
3953 UShort u16 = line->u16s[loff];
3954 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3955 line->u16s[loff] = u16 | mask; /* set them */
3956 return ok;
3957 } else {
3958 /* miss. nuke existing line and re-use it. */
3959 UWord i;
3960 fi->tags[lineno] = atag;
3961 for (i = 0; i < FI_LINE_SZB / 8; i++)
3962 line->u16s[i] = 0;
3963 line->u16s[loff] = mask;
3964 return False;
3965 }
3966 }
3967}
3968
3969static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3970{
3971 {
3972 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3973 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3974 FiLine* line = &fi->lines[lineno];
3975 UWord loff = (a - atag) / 8;
3976 UShort mask = 0x3 << (2 * (a & 7));
3977 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3978 if (LIKELY( fi->tags[lineno] == atag )) {
3979 /* hit. check line and update. */
3980 UShort u16 = line->u16s[loff];
3981 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3982 line->u16s[loff] = u16 | mask; /* set them */
3983 return ok;
3984 } else {
3985 /* miss. nuke existing line and re-use it. */
3986 UWord i;
3987 fi->tags[lineno] = atag;
3988 for (i = 0; i < FI_LINE_SZB / 8; i++)
3989 line->u16s[i] = 0;
3990 line->u16s[loff] = mask;
3991 return False;
3992 }
3993 }
3994}
3995
sewardjf98e1c02008-10-25 16:22:41 +00003996
3997/////////////////////////////////////////////////////////
3998// //
3999// Threads //
4000// //
4001/////////////////////////////////////////////////////////
4002
sewardje4cce742011-02-24 15:25:24 +00004003/* Maps ThrID values to their Thr*s (which contain ThrID values that
4004 should point back to the relevant slot in the array. Lowest
4005 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
4006static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
4007
4008/* And a counter to dole out ThrID values. For rationale/background,
4009 see comments on definition of ScalarTS (far) above. */
sewardj7aa38a92011-02-27 23:04:12 +00004010static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
sewardje4cce742011-02-24 15:25:24 +00004011
4012static ThrID Thr__to_ThrID ( Thr* thr ) {
4013 return thr->thrid;
4014}
4015static Thr* Thr__from_ThrID ( UInt thrid ) {
4016 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
4017 tl_assert(thr->thrid == thrid);
4018 return thr;
4019}
4020
4021static Thr* Thr__new ( void )
4022{
sewardjf98e1c02008-10-25 16:22:41 +00004023 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
4024 thr->viR = VtsID_INVALID;
4025 thr->viW = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00004026 thr->llexit_done = False;
4027 thr->joinedwith_done = False;
sewardj23f12002009-07-24 08:45:08 +00004028 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
philippeca903bb2014-04-26 22:50:08 +00004029 if (HG_(clo_history_level) == 1)
4030 thr->local_Kws_n_stacks
4031 = VG_(newXA)( HG_(zalloc),
4032 "libhb.Thr__new.3 (local_Kws_and_stacks)",
4033 HG_(free), sizeof(ULong_n_EC) );
sewardje4cce742011-02-24 15:25:24 +00004034
4035 /* Add this Thr* <-> ThrID binding to the mapping, and
4036 cross-check */
4037 if (!thrid_to_thr_map) {
4038 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
4039 HG_(free), sizeof(Thr*) );
sewardje4cce742011-02-24 15:25:24 +00004040 }
4041
sewardj7aa38a92011-02-27 23:04:12 +00004042 if (thrid_counter >= ThrID_MAX_VALID) {
sewardje4cce742011-02-24 15:25:24 +00004043 /* We're hosed. We have to stop. */
4044 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
4045 }
4046
4047 thr->thrid = thrid_counter++;
4048 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
4049 tl_assert(ix + 1024 == thr->thrid);
4050
sewardjf98e1c02008-10-25 16:22:41 +00004051 return thr;
4052}
4053
sewardj8ab2c132009-08-02 09:34:35 +00004054static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00004055{
4056 Word nPresent;
4057 ULong_n_EC pair;
4058 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00004059
4060 // We only collect this info at history level 1 (approx)
4061 if (HG_(clo_history_level) != 1)
4062 return;
4063
sewardj8ab2c132009-08-02 09:34:35 +00004064 /* This is the scalar Kw for thr. */
4065 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00004066 pair.ec = main_get_EC( thr );
4067 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00004068 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004069
4070 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00004071 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00004072
4073 /* Throw away old stacks, if necessary. We can't accumulate stuff
4074 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00004075 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
4076 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
4077 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4078 if (0)
4079 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00004080 thr, pair.ull, pair.ec );
4081 }
4082
4083 if (nPresent > 0) {
4084 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00004085 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
4086 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00004087 }
4088
4089 if (nPresent == 0)
4090 pair.ec = NULL;
4091
sewardj8ab2c132009-08-02 09:34:35 +00004092 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00004093
4094 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00004095 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00004096 thr, pair.ull, pair.ec );
4097 if (0)
4098 VG_(pp_ExeContext)(pair.ec);
4099}
4100
florian6bd9dc12012-11-23 16:17:43 +00004101static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
4102 const ULong_n_EC* pair2 )
sewardj23f12002009-07-24 08:45:08 +00004103{
4104 if (pair1->ull < pair2->ull) return -1;
4105 if (pair1->ull > pair2->ull) return 1;
4106 return 0;
4107}
4108
sewardjf98e1c02008-10-25 16:22:41 +00004109
4110/////////////////////////////////////////////////////////
4111// //
4112// Shadow Values //
4113// //
4114/////////////////////////////////////////////////////////
4115
4116// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
4117// hb_zsm.h. We have to do everything else here.
4118
4119/* SVal is 64 bit unsigned int.
4120
4121 <---------30---------> <---------30--------->
4122 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00004123 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00004124 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
4125
sewardjf98e1c02008-10-25 16:22:41 +00004126*/
4127#define SVAL_TAGMASK (3ULL << 62)
4128
4129static inline Bool SVal__isC ( SVal s ) {
4130 return (0ULL << 62) == (s & SVAL_TAGMASK);
4131}
4132static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
4133 //tl_assert(VtsID__is_valid(rmini));
4134 //tl_assert(VtsID__is_valid(wmini));
4135 return (((ULong)rmini) << 32) | ((ULong)wmini);
4136}
4137static inline VtsID SVal__unC_Rmin ( SVal s ) {
4138 tl_assert(SVal__isC(s));
4139 return (VtsID)(s >> 32);
4140}
4141static inline VtsID SVal__unC_Wmin ( SVal s ) {
4142 tl_assert(SVal__isC(s));
4143 return (VtsID)(s & 0xFFFFFFFFULL);
4144}
4145
sewardj23f12002009-07-24 08:45:08 +00004146static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004147 return (2ULL << 62) == (s & SVAL_TAGMASK);
4148}
sewardj5aa09bf2014-06-20 14:25:53 +00004149__attribute__((unused))
sewardj23f12002009-07-24 08:45:08 +00004150static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00004151 return 2ULL << 62;
4152}
4153
4154/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00004155static inline void SVal__rcinc ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004156 if (SVal__isC(s)) {
4157 VtsID__rcinc( SVal__unC_Rmin(s) );
4158 VtsID__rcinc( SVal__unC_Wmin(s) );
4159 }
4160}
4161
4162/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00004163static inline void SVal__rcdec ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004164 if (SVal__isC(s)) {
4165 VtsID__rcdec( SVal__unC_Rmin(s) );
4166 VtsID__rcdec( SVal__unC_Wmin(s) );
4167 }
4168}
4169
philippe71ed3c92015-05-17 19:32:42 +00004170static inline void *SVal2Ptr (SVal s)
4171{
4172 return (void*)(UWord)s;
4173}
4174
4175static inline SVal Ptr2SVal (void* ptr)
4176{
4177 return (SVal)(UWord)ptr;
4178}
4179
4180
sewardjf98e1c02008-10-25 16:22:41 +00004181
4182/////////////////////////////////////////////////////////
4183// //
4184// Change-event map2 //
4185// //
4186/////////////////////////////////////////////////////////
4187
sewardjf98e1c02008-10-25 16:22:41 +00004188/* This is in two parts:
4189
sewardj23f12002009-07-24 08:45:08 +00004190 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00004191 traces. When the reference count of a stack trace becomes zero,
4192 it is removed from the set and freed up. The intent is to have
4193 a set of stack traces which can be referred to from (2), but to
4194 only represent each one once. The set is indexed/searched by
4195 ordering on the stack trace vectors.
4196
philippe328d6622015-05-25 17:24:27 +00004197 2. A Hash table of OldRefs. These store information about each old
4198 ref that we need to record. Hash table key is the address of the
sewardjf98e1c02008-10-25 16:22:41 +00004199 location for which the information is recorded. For LRU
philippe328d6622015-05-25 17:24:27 +00004200 purposes, each OldRef in the hash table is also on a doubly
philippecabdbb52015-04-20 21:33:16 +00004201 linked list maintaining the order in which the OldRef were most
4202 recently accessed.
philippe328d6622015-05-25 17:24:27 +00004203 Each OldRef also maintains the stamp at which it was last accessed.
4204 With these stamps, we can quickly check which of 2 OldRef is the
4205 'newest', without having to scan the full list of LRU OldRef.
sewardjf98e1c02008-10-25 16:22:41 +00004206
philippe328d6622015-05-25 17:24:27 +00004207 The important part of an OldRef is, however, its acc component.
4208 This binds a TSW triple (thread, size, R/W) to an RCEC.
sewardjf98e1c02008-10-25 16:22:41 +00004209
philippecabdbb52015-04-20 21:33:16 +00004210 We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
4211 Then we do exact LRU discarding. For each discarded OldRef we must
philippe328d6622015-05-25 17:24:27 +00004212 of course decrement the reference count on the RCEC it
sewardjf98e1c02008-10-25 16:22:41 +00004213 refers to, in order that entries from (1) eventually get
4214 discarded too.
4215*/
4216
philippea4b20c02015-05-23 12:25:22 +00004217static UWord stats__evm__lookup_found = 0;
4218static UWord stats__evm__lookup_notfound = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004219
philippe328d6622015-05-25 17:24:27 +00004220static UWord stats__ctxt_eq_tsw_eq_rcec = 0;
4221static UWord stats__ctxt_eq_tsw_neq_rcec = 0;
4222static UWord stats__ctxt_neq_tsw_neq_rcec = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004223static UWord stats__ctxt_rcdec_calls = 0;
philippe328d6622015-05-25 17:24:27 +00004224static UWord stats__ctxt_rcec_gc_discards = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004225
4226static UWord stats__ctxt_tab_curr = 0;
4227static UWord stats__ctxt_tab_max = 0;
4228
4229static UWord stats__ctxt_tab_qs = 0;
4230static UWord stats__ctxt_tab_cmps = 0;
4231
4232
4233///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00004234//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00004235///
4236
4237#define N_FRAMES 8
4238
4239// (UInt) `echo "Reference Counted Execution Context" | md5sum`
4240#define RCEC_MAGIC 0xab88abb2UL
4241
4242//#define N_RCEC_TAB 98317 /* prime */
4243#define N_RCEC_TAB 196613 /* prime */
4244
4245typedef
4246 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00004247 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00004248 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00004249 UWord rc;
4250 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00004251 UWord frames_hash; /* hash of all the frames */
4252 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00004253 }
4254 RCEC;
4255
philippecabdbb52015-04-20 21:33:16 +00004256//////////// BEGIN RCEC pool allocator
4257static PoolAlloc* rcec_pool_allocator;
4258static RCEC* alloc_RCEC ( void ) {
4259 return VG_(allocEltPA) ( rcec_pool_allocator );
4260}
4261
4262static void free_RCEC ( RCEC* rcec ) {
4263 tl_assert(rcec->magic == RCEC_MAGIC);
4264 VG_(freeEltPA)( rcec_pool_allocator, rcec );
4265}
4266//////////// END RCEC pool allocator
4267
sewardjf98e1c02008-10-25 16:22:41 +00004268static RCEC** contextTab = NULL; /* hash table of RCEC*s */
4269
philippecabdbb52015-04-20 21:33:16 +00004270/* Count of allocated RCEC having ref count > 0 */
4271static UWord RCEC_referenced = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004272
4273/* Gives an arbitrary total order on RCEC .frames fields */
4274static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4275 Word i;
4276 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4277 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00004278 if (ec1->frames_hash < ec2->frames_hash) return -1;
4279 if (ec1->frames_hash > ec2->frames_hash) return 1;
4280 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004281 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00004282 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00004283 }
4284 return 0;
4285}
4286
4287
4288/* Dec the ref of this RCEC. */
4289static void ctxt__rcdec ( RCEC* ec )
4290{
4291 stats__ctxt_rcdec_calls++;
4292 tl_assert(ec && ec->magic == RCEC_MAGIC);
4293 tl_assert(ec->rc > 0);
4294 ec->rc--;
philippecabdbb52015-04-20 21:33:16 +00004295 if (ec->rc == 0)
4296 RCEC_referenced--;
sewardjf98e1c02008-10-25 16:22:41 +00004297}
4298
4299static void ctxt__rcinc ( RCEC* ec )
4300{
4301 tl_assert(ec && ec->magic == RCEC_MAGIC);
philippecabdbb52015-04-20 21:33:16 +00004302 if (ec->rc == 0)
4303 RCEC_referenced++;
sewardjf98e1c02008-10-25 16:22:41 +00004304 ec->rc++;
4305}
4306
4307
4308/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
florianad4e9792015-07-05 21:53:33 +00004309 move it one step closer to the front of the list, so as to make
sewardjf98e1c02008-10-25 16:22:41 +00004310 subsequent searches for it cheaper. */
4311static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4312{
4313 RCEC *ec0, *ec1, *ec2;
4314 if (ec == *headp)
4315 tl_assert(0); /* already at head of list */
4316 tl_assert(ec != NULL);
4317 ec0 = *headp;
4318 ec1 = NULL;
4319 ec2 = NULL;
4320 while (True) {
4321 if (ec0 == NULL || ec0 == ec) break;
4322 ec2 = ec1;
4323 ec1 = ec0;
4324 ec0 = ec0->next;
4325 }
4326 tl_assert(ec0 == ec);
4327 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4328 RCEC* tmp;
4329 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4330 predecessor. Swap ec0 and ec1, that is, move ec0 one step
4331 closer to the start of the list. */
4332 tl_assert(ec2->next == ec1);
4333 tl_assert(ec1->next == ec0);
4334 tmp = ec0->next;
4335 ec2->next = ec0;
4336 ec0->next = ec1;
4337 ec1->next = tmp;
4338 }
4339 else
4340 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4341 /* it's second in the list. */
4342 tl_assert(*headp == ec1);
4343 tl_assert(ec1->next == ec0);
4344 ec1->next = ec0->next;
4345 ec0->next = ec1;
4346 *headp = ec0;
4347 }
4348}
4349
4350
4351/* Find the given RCEC in the tree, and return a pointer to it. Or,
4352 if not present, add the given one to the tree (by making a copy of
4353 it, so the caller can immediately deallocate the original) and
4354 return a pointer to the copy. The caller can safely have 'example'
4355 on its stack, since we will always return a pointer to a copy of
4356 it, not to the original. Note that the inserted node will have .rc
florianad4e9792015-07-05 21:53:33 +00004357 of zero and so the caller must immediately increment it. */
sewardjf98e1c02008-10-25 16:22:41 +00004358__attribute__((noinline))
4359static RCEC* ctxt__find_or_add ( RCEC* example )
4360{
4361 UWord hent;
4362 RCEC* copy;
4363 tl_assert(example && example->magic == RCEC_MAGIC);
4364 tl_assert(example->rc == 0);
4365
4366 /* Search the hash table to see if we already have it. */
4367 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00004368 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00004369 copy = contextTab[hent];
4370 while (1) {
4371 if (!copy) break;
4372 tl_assert(copy->magic == RCEC_MAGIC);
4373 stats__ctxt_tab_cmps++;
4374 if (0 == RCEC__cmp_by_frames(copy, example)) break;
4375 copy = copy->next;
4376 }
4377
4378 if (copy) {
4379 tl_assert(copy != example);
4380 /* optimisation: if it's not at the head of its list, move 1
4381 step fwds, to make future searches cheaper */
4382 if (copy != contextTab[hent]) {
4383 move_RCEC_one_step_forward( &contextTab[hent], copy );
4384 }
4385 } else {
sewardjd86e3a22008-12-03 11:39:37 +00004386 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00004387 tl_assert(copy != example);
4388 *copy = *example;
4389 copy->next = contextTab[hent];
4390 contextTab[hent] = copy;
4391 stats__ctxt_tab_curr++;
4392 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4393 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4394 }
4395 return copy;
4396}
4397
4398static inline UWord ROLW ( UWord w, Int n )
4399{
4400 Int bpw = 8 * sizeof(UWord);
4401 w = (w << n) | (w >> (bpw-n));
4402 return w;
4403}
4404
4405__attribute__((noinline))
4406static RCEC* get_RCEC ( Thr* thr )
4407{
4408 UWord hash, i;
4409 RCEC example;
4410 example.magic = RCEC_MAGIC;
4411 example.rc = 0;
4412 example.rcX = 0;
florian195623b2013-01-22 00:25:05 +00004413 example.next = NULL;
njn6c83d5e2009-05-05 23:46:24 +00004414 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00004415 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00004416 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004417 hash ^= example.frames[i];
4418 hash = ROLW(hash, 19);
4419 }
njn6c83d5e2009-05-05 23:46:24 +00004420 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00004421 return ctxt__find_or_add( &example );
4422}
4423
4424///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00004425//// Part (2):
philippe328d6622015-05-25 17:24:27 +00004426/// A hashtable guest-addr -> OldRef, that refers to (1)
4427/// Note: we use the guest address as key. This means that the entries
4428/// for multiple threads accessing the same address will land in the same
4429/// bucket. It might be nice to have a better distribution of the
4430/// OldRef in the hashtable by using ask key the guestaddress ^ tsw.
4431/// The problem is that when a race is reported on a ga, we need to retrieve
4432/// efficiently the accesses to ga by other threads, only using the ga.
4433/// Measurements on firefox have shown that the chain length is reasonable.
sewardjf98e1c02008-10-25 16:22:41 +00004434
sewardjffce8152011-06-24 10:09:41 +00004435/* Records an access: a thread, a context (size & writeness) and the
philippe328d6622015-05-25 17:24:27 +00004436 number of held locks. The size (1,2,4,8) is stored as is in szB.
4437 Note that szB uses more bits than needed to store a size up to 8.
4438 This allows to use a TSW as a fully initialised UInt e.g. in
4439 cmp_oldref_tsw. If needed, a more compact representation of szB
4440 can be done (e.g. use only 4 bits, or use only 2 bits and encode the
4441 size (1,2,4,8) as 00 = 1, 01 = 2, 10 = 4, 11 = 8. */
4442typedef
4443 struct {
sewardjffce8152011-06-24 10:09:41 +00004444 UInt thrid : SCALARTS_N_THRBITS;
philippe328d6622015-05-25 17:24:27 +00004445 UInt szB : 32 - SCALARTS_N_THRBITS - 1;
sewardjffce8152011-06-24 10:09:41 +00004446 UInt isW : 1;
philippe328d6622015-05-25 17:24:27 +00004447 } TSW; // Thread+Size+Writeness
4448typedef
4449 struct {
4450 TSW tsw;
4451 WordSetID locksHeldW;
4452 RCEC* rcec;
sewardjffce8152011-06-24 10:09:41 +00004453 }
4454 Thr_n_RCEC;
sewardjf98e1c02008-10-25 16:22:41 +00004455
sewardjf98e1c02008-10-25 16:22:41 +00004456typedef
philippecabdbb52015-04-20 21:33:16 +00004457 struct OldRef {
philippe328d6622015-05-25 17:24:27 +00004458 struct OldRef *ht_next; // to link hash table nodes together.
4459 UWord ga; // hash_table key, == address for which we record an access.
philippecabdbb52015-04-20 21:33:16 +00004460 struct OldRef *prev; // to refs older than this one
4461 struct OldRef *next; // to refs newer that this one
philippe328d6622015-05-25 17:24:27 +00004462 UWord stamp; // allows to order (by time of access) 2 OldRef
4463 Thr_n_RCEC acc;
sewardjf98e1c02008-10-25 16:22:41 +00004464 }
4465 OldRef;
philippe328d6622015-05-25 17:24:27 +00004466
4467/* Returns the or->tsw as an UInt */
4468static inline UInt oldref_tsw (const OldRef* or)
4469{
4470 return *(const UInt*)(&or->acc.tsw);
4471}
4472
4473/* Compare the tsw component for 2 OldRef.
4474 Used for OldRef hashtable (which already verifies equality of the
4475 'key' part. */
4476static Word cmp_oldref_tsw (const void* node1, const void* node2 )
4477{
4478 const UInt tsw1 = oldref_tsw(node1);
4479 const UInt tsw2 = oldref_tsw(node2);
4480
4481 if (tsw1 < tsw2) return -1;
4482 if (tsw1 > tsw2) return 1;
4483 return 0;
4484}
4485
sewardjd86e3a22008-12-03 11:39:37 +00004486
philippe6643e962012-01-17 21:16:30 +00004487//////////// BEGIN OldRef pool allocator
4488static PoolAlloc* oldref_pool_allocator;
philippecabdbb52015-04-20 21:33:16 +00004489// Note: We only allocate elements in this pool allocator, we never free them.
4490// We stop allocating elements at VG_(clo_conflict_cache_size).
philippe6643e962012-01-17 21:16:30 +00004491//////////// END OldRef pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00004492
philippecabdbb52015-04-20 21:33:16 +00004493static OldRef mru;
4494static OldRef lru;
4495// A double linked list, chaining all OldREf in a mru/lru order.
4496// mru/lru are sentinel nodes.
4497// Whenever an oldref is re-used, its position is changed as the most recently
4498// used (i.e. pointed to by mru.prev).
4499// When a new oldref is needed, it is allocated from the pool
4500// if we have not yet reached --conflict-cache-size.
4501// Otherwise, if all oldref have already been allocated,
4502// the least recently used (i.e. pointed to by lru.next) is re-used.
4503// When an OldRef is used, it is moved as the most recently used entry
4504// (i.e. pointed to by mru.prev).
4505
4506// Removes r from the double linked list
4507// Note: we do not need to test for special cases such as
4508// NULL next or prev pointers, because we have sentinel nodes
4509// at both sides of the list. So, a node is always forward and
4510// backward linked.
4511static inline void OldRef_unchain(OldRef *r)
4512{
4513 r->next->prev = r->prev;
4514 r->prev->next = r->next;
4515}
4516
4517// Insert new as the newest OldRef
4518// Similarly to OldRef_unchain, no need to test for NULL
4519// pointers, as e.g. mru.prev is always guaranteed to point
4520// to a non NULL node (lru when the list is empty).
4521static inline void OldRef_newest(OldRef *new)
4522{
4523 new->next = &mru;
4524 new->prev = mru.prev;
4525 mru.prev = new;
4526 new->prev->next = new;
4527}
sewardjd86e3a22008-12-03 11:39:37 +00004528
philippe328d6622015-05-25 17:24:27 +00004529
4530static VgHashTable* oldrefHT = NULL; /* Hash table* OldRef* */
4531static UWord oldrefHTN = 0; /* # elems in oldrefHT */
4532/* Note: the nr of ref in the oldrefHT will always be equal to
philippecabdbb52015-04-20 21:33:16 +00004533 the nr of elements that were allocated from the OldRef pool allocator
4534 as we never free an OldRef : we just re-use them. */
4535
4536
4537/* allocates a new OldRef or re-use the lru one if all allowed OldRef
4538 have already been allocated. */
4539static OldRef* alloc_or_reuse_OldRef ( void )
4540{
philippe328d6622015-05-25 17:24:27 +00004541 if (oldrefHTN < HG_(clo_conflict_cache_size)) {
4542 oldrefHTN++;
philippecabdbb52015-04-20 21:33:16 +00004543 return VG_(allocEltPA) ( oldref_pool_allocator );
4544 } else {
philippe328d6622015-05-25 17:24:27 +00004545 OldRef *oldref_ht;
philippecabdbb52015-04-20 21:33:16 +00004546 OldRef *oldref = lru.next;
4547
4548 OldRef_unchain(oldref);
philippe328d6622015-05-25 17:24:27 +00004549 oldref_ht = VG_(HT_gen_remove) (oldrefHT, oldref, cmp_oldref_tsw);
4550 tl_assert (oldref == oldref_ht);
4551 ctxt__rcdec( oldref->acc.rcec );
philippecabdbb52015-04-20 21:33:16 +00004552 return oldref;
4553 }
4554}
4555
sewardjf98e1c02008-10-25 16:22:41 +00004556
sewardj1669cc72008-12-13 01:20:21 +00004557inline static UInt min_UInt ( UInt a, UInt b ) {
4558 return a < b ? a : b;
4559}
4560
sewardja781be62008-12-08 00:12:28 +00004561/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4562 first interval is lower, 1 if the first interval is higher, and 0
4563 if there is any overlap. Redundant paranoia with casting is there
4564 following what looked distinctly like a bug in gcc-4.1.2, in which
4565 some of the comparisons were done signedly instead of
4566 unsignedly. */
4567/* Copied from exp-ptrcheck/sg_main.c */
philippe328d6622015-05-25 17:24:27 +00004568static inline Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4569 Addr a2, SizeT n2 ) {
sewardja781be62008-12-08 00:12:28 +00004570 UWord a1w = (UWord)a1;
4571 UWord n1w = (UWord)n1;
4572 UWord a2w = (UWord)a2;
4573 UWord n2w = (UWord)n2;
4574 tl_assert(n1w > 0 && n2w > 0);
4575 if (a1w + n1w <= a2w) return -1L;
4576 if (a2w + n2w <= a1w) return 1L;
4577 return 0;
4578}
4579
philippe328d6622015-05-25 17:24:27 +00004580static UWord event_map_stamp = 0; // Used to stamp each OldRef when touched.
4581
sewardjc5ea9962008-12-07 01:41:46 +00004582static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00004583{
philippe328d6622015-05-25 17:24:27 +00004584 OldRef example;
sewardjd86e3a22008-12-03 11:39:37 +00004585 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00004586 RCEC* rcec;
sewardjf98e1c02008-10-25 16:22:41 +00004587
sewardjffce8152011-06-24 10:09:41 +00004588 tl_assert(thr);
4589 ThrID thrid = thr->thrid;
4590 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4591
4592 WordSetID locksHeldW = thr->hgthread->locksetW;
4593
sewardjc5ea9962008-12-07 01:41:46 +00004594 rcec = get_RCEC( thr );
sewardjc5ea9962008-12-07 01:41:46 +00004595
philippe328d6622015-05-25 17:24:27 +00004596 tl_assert (szB == 4 || szB == 8 ||szB == 1 || szB == 2);
4597 // Check for most frequent cases first
4598 // Note: we could support a szB up to 1 << (32 - SCALARTS_N_THRBITS - 1)
sewardjc5ea9962008-12-07 01:41:46 +00004599
philippe328d6622015-05-25 17:24:27 +00004600 /* Look in the oldrefHT to see if we already have a record for this
4601 address/thr/sz/isW. */
4602 example.ga = a;
4603 example.acc.tsw = (TSW) {.thrid = thrid,
4604 .szB = szB,
4605 .isW = (UInt)(isW & 1)};
4606 ref = VG_(HT_gen_lookup) (oldrefHT, &example, cmp_oldref_tsw);
sewardjf98e1c02008-10-25 16:22:41 +00004607
philippe328d6622015-05-25 17:24:27 +00004608 if (ref) {
4609 /* We already have a record for this address and this (thrid, R/W,
sewardj849b0ed2008-12-21 10:43:10 +00004610 size) triple. */
philippecabdbb52015-04-20 21:33:16 +00004611 tl_assert (ref->ga == a);
sewardjf98e1c02008-10-25 16:22:41 +00004612
philippe328d6622015-05-25 17:24:27 +00004613 /* thread 'thr' has an entry. Update its RCEC, if it differs. */
4614 if (rcec == ref->acc.rcec)
4615 stats__ctxt_eq_tsw_eq_rcec++;
4616 else {
4617 stats__ctxt_eq_tsw_neq_rcec++;
4618 ctxt__rcdec( ref->acc.rcec );
4619 ctxt__rcinc(rcec);
4620 ref->acc.rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00004621 }
philippe328d6622015-05-25 17:24:27 +00004622 tl_assert(ref->acc.tsw.thrid == thrid);
4623 /* Update the stamp, RCEC and the W-held lockset. */
4624 ref->stamp = event_map_stamp;
4625 ref->acc.locksHeldW = locksHeldW;
sewardjf98e1c02008-10-25 16:22:41 +00004626
philippecabdbb52015-04-20 21:33:16 +00004627 OldRef_unchain(ref);
4628 OldRef_newest(ref);
sewardjf98e1c02008-10-25 16:22:41 +00004629
4630 } else {
philippe328d6622015-05-25 17:24:27 +00004631 /* We don't have a record for this address+triple. Create a new one. */
4632 stats__ctxt_neq_tsw_neq_rcec++;
philippecabdbb52015-04-20 21:33:16 +00004633 ref = alloc_or_reuse_OldRef();
4634 ref->ga = a;
philippe328d6622015-05-25 17:24:27 +00004635 ref->acc.tsw = (TSW) {.thrid = thrid,
4636 .szB = szB,
4637 .isW = (UInt)(isW & 1)};
4638 ref->stamp = event_map_stamp;
4639 ref->acc.locksHeldW = locksHeldW;
4640 ref->acc.rcec = rcec;
4641 ctxt__rcinc(rcec);
sewardjffce8152011-06-24 10:09:41 +00004642
philippe328d6622015-05-25 17:24:27 +00004643 VG_(HT_add_node) ( oldrefHT, ref );
philippecabdbb52015-04-20 21:33:16 +00004644 OldRef_newest (ref);
sewardjf98e1c02008-10-25 16:22:41 +00004645 }
philippe328d6622015-05-25 17:24:27 +00004646 event_map_stamp++;
sewardjf98e1c02008-10-25 16:22:41 +00004647}
4648
4649
philippe328d6622015-05-25 17:24:27 +00004650/* Extract info from the conflicting-access machinery.
4651 Returns the most recent conflicting access with thr/[a, a+szB[/isW. */
sewardjc5ea9962008-12-07 01:41:46 +00004652Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjffce8152011-06-24 10:09:41 +00004653 /*OUT*/Thr** resThr,
4654 /*OUT*/SizeT* resSzB,
4655 /*OUT*/Bool* resIsW,
4656 /*OUT*/WordSetID* locksHeldW,
sewardjc5ea9962008-12-07 01:41:46 +00004657 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00004658{
sewardja781be62008-12-08 00:12:28 +00004659 Word i, j;
philippe328d6622015-05-25 17:24:27 +00004660 OldRef *ref = NULL;
4661 SizeT ref_szB = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004662
philippe328d6622015-05-25 17:24:27 +00004663 OldRef *cand_ref;
4664 SizeT cand_ref_szB;
4665 Addr cand_a;
sewardja781be62008-12-08 00:12:28 +00004666
4667 Addr toCheck[15];
4668 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004669
4670 tl_assert(thr);
4671 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00004672
sewardjffce8152011-06-24 10:09:41 +00004673 ThrID thrid = thr->thrid;
4674
sewardja781be62008-12-08 00:12:28 +00004675 toCheck[nToCheck++] = a;
4676 for (i = -7; i < (Word)szB; i++) {
4677 if (i != 0)
4678 toCheck[nToCheck++] = a + i;
4679 }
4680 tl_assert(nToCheck <= 15);
4681
4682 /* Now see if we can find a suitable matching event for
4683 any of the addresses in toCheck[0 .. nToCheck-1]. */
4684 for (j = 0; j < nToCheck; j++) {
4685
4686 cand_a = toCheck[j];
4687 // VG_(printf)("test %ld %p\n", j, cand_a);
4688
philippe328d6622015-05-25 17:24:27 +00004689 /* Find the first HT element for this address.
4690 We might have several of these. They will be linked via ht_next.
4691 We however need to check various elements as the list contains
4692 all elements that map to the same bucket. */
4693 for (cand_ref = VG_(HT_lookup)( oldrefHT, cand_a );
4694 cand_ref; cand_ref = cand_ref->ht_next) {
4695 if (cand_ref->ga != cand_a)
4696 /* OldRef for another address in this HT bucket. Ignore. */
sewardjc5ea9962008-12-07 01:41:46 +00004697 continue;
4698
philippe328d6622015-05-25 17:24:27 +00004699 if (cand_ref->acc.tsw.thrid == thrid)
sewardjc5ea9962008-12-07 01:41:46 +00004700 /* This is an access by the same thread, but we're only
4701 interested in accesses from other threads. Ignore. */
4702 continue;
4703
philippe328d6622015-05-25 17:24:27 +00004704 if ((!cand_ref->acc.tsw.isW) && (!isW))
sewardjc5ea9962008-12-07 01:41:46 +00004705 /* We don't want to report a read racing against another
4706 read; that's stupid. So in this case move on. */
4707 continue;
4708
philippe328d6622015-05-25 17:24:27 +00004709 cand_ref_szB = cand_ref->acc.tsw.szB;
4710 if (cmp_nonempty_intervals(a, szB, cand_a, cand_ref_szB) != 0)
sewardja781be62008-12-08 00:12:28 +00004711 /* No overlap with the access we're asking about. Ignore. */
4712 continue;
4713
philippe328d6622015-05-25 17:24:27 +00004714 /* We have a match. Keep this match if it is newer than
4715 the previous match. Note that stamp are Unsigned Words, and
4716 for long running applications, event_map_stamp might have cycled.
4717 So, 'roll' each stamp using event_map_stamp to have the
4718 stamps in the good order, in case event_map_stamp recycled. */
4719 if (!ref
4720 || (ref->stamp - event_map_stamp)
4721 < (cand_ref->stamp - event_map_stamp)) {
4722 ref = cand_ref;
4723 ref_szB = cand_ref_szB;
4724 }
sewardjc5ea9962008-12-07 01:41:46 +00004725 }
4726
philippe328d6622015-05-25 17:24:27 +00004727 if (ref) {
sewardja781be62008-12-08 00:12:28 +00004728 /* return with success */
philippe328d6622015-05-25 17:24:27 +00004729 Int n, maxNFrames;
4730 RCEC* ref_rcec = ref->acc.rcec;
4731 tl_assert(ref->acc.tsw.thrid);
4732 tl_assert(ref_rcec);
4733 tl_assert(ref_rcec->magic == RCEC_MAGIC);
4734 tl_assert(ref_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00004735 /* Count how many non-zero frames we have. */
4736 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4737 for (n = 0; n < maxNFrames; n++) {
philippe328d6622015-05-25 17:24:27 +00004738 if (0 == ref_rcec->frames[n]) break;
njn3a4b58f2009-05-07 23:08:10 +00004739 }
philippe328d6622015-05-25 17:24:27 +00004740 *resEC = VG_(make_ExeContext_from_StackTrace)(ref_rcec->frames,
4741 n);
4742 *resThr = Thr__from_ThrID(ref->acc.tsw.thrid);
4743 *resSzB = ref_szB;
4744 *resIsW = ref->acc.tsw.isW;
4745 *locksHeldW = ref->acc.locksHeldW;
philippea4b20c02015-05-23 12:25:22 +00004746 stats__evm__lookup_found++;
sewardja781be62008-12-08 00:12:28 +00004747 return True;
4748 }
sewardjc5ea9962008-12-07 01:41:46 +00004749
sewardja781be62008-12-08 00:12:28 +00004750 /* consider next address in toCheck[] */
4751 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00004752
sewardja781be62008-12-08 00:12:28 +00004753 /* really didn't find anything. */
philippea4b20c02015-05-23 12:25:22 +00004754 stats__evm__lookup_notfound++;
sewardja781be62008-12-08 00:12:28 +00004755 return False;
sewardjf98e1c02008-10-25 16:22:41 +00004756}
4757
philippe328d6622015-05-25 17:24:27 +00004758
4759void libhb_event_map_access_history ( Addr a, SizeT szB, Access_t fn )
4760{
4761 OldRef *ref = lru.next;
4762 SizeT ref_szB;
4763 Int n;
4764
4765 while (ref != &mru) {
4766 ref_szB = ref->acc.tsw.szB;
4767 if (cmp_nonempty_intervals(a, szB, ref->ga, ref_szB) == 0) {
4768 RCEC* ref_rcec = ref->acc.rcec;
4769 for (n = 0; n < N_FRAMES; n++) {
4770 if (0 == ref_rcec->frames[n]) {
4771 break;
4772 }
4773 }
4774 (*fn)(ref_rcec->frames, n,
4775 Thr__from_ThrID(ref->acc.tsw.thrid),
4776 ref->ga,
4777 ref_szB,
4778 ref->acc.tsw.isW,
4779 ref->acc.locksHeldW);
4780 }
4781 tl_assert (ref->next == &mru
4782 || ((ref->stamp - event_map_stamp)
4783 < ref->next->stamp - event_map_stamp));
4784 ref = ref->next;
4785 }
4786}
4787
sewardjf98e1c02008-10-25 16:22:41 +00004788static void event_map_init ( void )
4789{
4790 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00004791
philippe6643e962012-01-17 21:16:30 +00004792 /* Context (RCEC) pool allocator */
4793 rcec_pool_allocator = VG_(newPA) (
4794 sizeof(RCEC),
4795 1000 /* RCECs per pool */,
4796 HG_(zalloc),
4797 "libhb.event_map_init.1 (RCEC pools)",
4798 HG_(free)
4799 );
sewardjd86e3a22008-12-03 11:39:37 +00004800
4801 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00004802 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00004803 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00004804 N_RCEC_TAB * sizeof(RCEC*) );
sewardjf98e1c02008-10-25 16:22:41 +00004805 for (i = 0; i < N_RCEC_TAB; i++)
4806 contextTab[i] = NULL;
4807
philippe6643e962012-01-17 21:16:30 +00004808 /* Oldref pool allocator */
4809 oldref_pool_allocator = VG_(newPA)(
4810 sizeof(OldRef),
4811 1000 /* OldRefs per pool */,
4812 HG_(zalloc),
4813 "libhb.event_map_init.3 (OldRef pools)",
4814 HG_(free)
4815 );
sewardjd86e3a22008-12-03 11:39:37 +00004816
philippe328d6622015-05-25 17:24:27 +00004817 /* Oldref hashtable */
4818 tl_assert(!oldrefHT);
4819 oldrefHT = VG_(HT_construct) ("libhb.event_map_init.4 (oldref hashtable)");
sewardjf98e1c02008-10-25 16:22:41 +00004820
philippe328d6622015-05-25 17:24:27 +00004821 oldrefHTN = 0;
philippecabdbb52015-04-20 21:33:16 +00004822 mru.prev = &lru;
4823 mru.next = NULL;
4824 lru.prev = NULL;
4825 lru.next = &mru;
philippe328d6622015-05-25 17:24:27 +00004826 mru.acc = (Thr_n_RCEC) {.tsw = {.thrid = 0,
4827 .szB = 0,
4828 .isW = 0},
4829 .locksHeldW = 0,
4830 .rcec = NULL};
4831 lru.acc = mru.acc;
sewardjf98e1c02008-10-25 16:22:41 +00004832}
4833
philippecabdbb52015-04-20 21:33:16 +00004834static void event_map__check_reference_counts ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004835{
4836 RCEC* rcec;
4837 OldRef* oldref;
4838 Word i;
4839 UWord nEnts = 0;
4840
4841 /* Set the 'check' reference counts to zero. Also, optionally
4842 check that the real reference counts are non-zero. We allow
4843 these to fall to zero before a GC, but the GC must get rid of
4844 all those that are zero, hence none should be zero after a
4845 GC. */
4846 for (i = 0; i < N_RCEC_TAB; i++) {
4847 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4848 nEnts++;
4849 tl_assert(rcec);
4850 tl_assert(rcec->magic == RCEC_MAGIC);
sewardjf98e1c02008-10-25 16:22:41 +00004851 rcec->rcX = 0;
4852 }
4853 }
4854
4855 /* check that the stats are sane */
4856 tl_assert(nEnts == stats__ctxt_tab_curr);
4857 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4858
4859 /* visit all the referencing points, inc check ref counts */
philippe328d6622015-05-25 17:24:27 +00004860 VG_(HT_ResetIter)( oldrefHT );
4861 oldref = VG_(HT_Next)( oldrefHT );
4862 while (oldref) {
4863 tl_assert (oldref->acc.tsw.thrid);
4864 tl_assert (oldref->acc.rcec);
4865 tl_assert (oldref->acc.rcec->magic == RCEC_MAGIC);
4866 oldref->acc.rcec->rcX++;
4867 oldref = VG_(HT_Next)( oldrefHT );
sewardjf98e1c02008-10-25 16:22:41 +00004868 }
4869
4870 /* compare check ref counts with actual */
4871 for (i = 0; i < N_RCEC_TAB; i++) {
4872 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4873 tl_assert(rcec->rc == rcec->rcX);
4874 }
4875 }
4876}
4877
sewardj8fd92d32008-11-20 23:17:01 +00004878__attribute__((noinline))
philippecabdbb52015-04-20 21:33:16 +00004879static void do_RCEC_GC ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004880{
philippecabdbb52015-04-20 21:33:16 +00004881 UInt i;
sewardjf98e1c02008-10-25 16:22:41 +00004882
philippecabdbb52015-04-20 21:33:16 +00004883 if (VG_(clo_stats)) {
4884 static UInt ctr = 1;
4885 VG_(message)(Vg_DebugMsg,
4886 "libhb: RCEC GC: #%u %lu slots,"
4887 " %lu cur ents(ref'd %lu),"
4888 " %lu max ents\n",
4889 ctr++,
4890 (UWord)N_RCEC_TAB,
4891 stats__ctxt_tab_curr, RCEC_referenced,
4892 stats__ctxt_tab_max );
sewardjf98e1c02008-10-25 16:22:41 +00004893 }
philippecabdbb52015-04-20 21:33:16 +00004894 tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004895
4896 /* Throw away all RCECs with zero reference counts */
4897 for (i = 0; i < N_RCEC_TAB; i++) {
4898 RCEC** pp = &contextTab[i];
4899 RCEC* p = *pp;
4900 while (p) {
4901 if (p->rc == 0) {
4902 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004903 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004904 p = *pp;
4905 tl_assert(stats__ctxt_tab_curr > 0);
philippe328d6622015-05-25 17:24:27 +00004906 stats__ctxt_rcec_gc_discards++;
sewardjf98e1c02008-10-25 16:22:41 +00004907 stats__ctxt_tab_curr--;
4908 } else {
4909 pp = &p->next;
4910 p = p->next;
4911 }
4912 }
4913 }
4914
philippecabdbb52015-04-20 21:33:16 +00004915 tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004916}
4917
sewardjf98e1c02008-10-25 16:22:41 +00004918/////////////////////////////////////////////////////////
4919// //
4920// Core MSM //
4921// //
4922/////////////////////////////////////////////////////////
4923
sewardj23f12002009-07-24 08:45:08 +00004924/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4925 Nov 08, and again after [...],
4926 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004927
sewardj23f12002009-07-24 08:45:08 +00004928static ULong stats__msmcread = 0;
4929static ULong stats__msmcread_change = 0;
4930static ULong stats__msmcwrite = 0;
4931static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004932
sewardj8ab2c132009-08-02 09:34:35 +00004933/* Some notes on the H1 history mechanism:
4934
4935 Transition rules are:
4936
4937 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4938 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4939
4940 After any access by a thread T to a location L, L's constraint pair
4941 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4942
4943 After a race by thread T conflicting with some previous access by
4944 some other thread U, for a location with constraint (before
4945 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4946 which the previously access lies.
4947
4948 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4949 are compared so as to find out which thread(s) this access
4950 conflicts with. Once that is established, we also require the
4951 pre-update Cw for the location, so we can index into it for those
4952 threads, to get the scalar clock values for the point at which the
4953 former accesses were made. (In fact we only bother to do any of
4954 this for an arbitrarily chosen one of the conflicting threads, as
4955 that's simpler, it avoids flooding the user with vast amounts of
4956 mostly useless information, and because the program is wrong if it
4957 contains any races at all -- so we don't really need to show all
4958 conflicting access pairs initially, so long as we only show none if
4959 none exist).
4960
4961 ---
4962
4963 That requires the auxiliary proof that
4964
4965 (Cr `join` Kw)[T] == Kw[T]
4966
4967 Why should that be true? Because for any thread T, Kw[T] >= the
4968 scalar clock value for T known by any other thread. In other
4969 words, because T's value for its own scalar clock is at least as up
4970 to date as the value for it known by any other thread (that is true
4971 for both the R- and W- scalar clocks). Hence no other thread will
4972 be able to feed in a value for that element (indirectly via a
4973 constraint) which will exceed Kw[T], and hence the join cannot
4974 cause that particular element to advance.
4975*/
4976
sewardjf98e1c02008-10-25 16:22:41 +00004977__attribute__((noinline))
4978static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004979 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00004980 VtsID Cfailed,
4981 VtsID Kfailed,
4982 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00004983{
sewardjc5ea9962008-12-07 01:41:46 +00004984 /* Call here to report a race. We just hand it onwards to
4985 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004986 error is going to be collected, then, at history_level 2, that
4987 queries the conflicting-event map. The alternative would be to
4988 query it right here. But that causes a lot of pointless queries
4989 for errors which will shortly be discarded as duplicates, and
4990 can become a performance overhead; so we defer the query until
4991 we know the error is not a duplicate. */
4992
4993 /* Stacks for the bounds of the (or one of the) conflicting
4994 segment(s). These are only set at history_level 1. */
4995 ExeContext* hist1_seg_start = NULL;
4996 ExeContext* hist1_seg_end = NULL;
4997 Thread* hist1_conf_thr = NULL;
4998
4999 tl_assert(acc_thr);
sewardj60626642011-03-10 15:14:37 +00005000 tl_assert(acc_thr->hgthread);
5001 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
sewardj23f12002009-07-24 08:45:08 +00005002 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
5003
5004 if (HG_(clo_history_level) == 1) {
5005 Bool found;
5006 Word firstIx, lastIx;
5007 ULong_n_EC key;
5008
5009 /* At history_level 1, we must round up the relevant stack-pair
5010 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00005011 deferring it is complex; we can't (easily) put Kfailed and
5012 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00005013 getting tied up in difficulties with VtsID reference
5014 counting. So just do it now. */
5015 Thr* confThr;
5016 ULong confTym = 0;
5017 /* Which thread are we in conflict with? There may be more than
5018 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
5019 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00005020 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00005021 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00005022 conflict (semantics of return value of
5023 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
5024 called us, just checked exactly this -- that there was in
5025 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00005026 tl_assert(confThr);
5027
5028 /* Get the scalar clock value that the conflicting thread
5029 introduced into the constraint. A careful examination of the
5030 base machine rules shows that this must be the same as the
5031 conflicting thread's scalar clock when it created this
5032 constraint. Hence we know the scalar clock of the
5033 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00005034 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00005035
5036 /* Using this scalar clock, index into the conflicting thread's
5037 collection of stack traces made each time its vector clock
5038 (hence its scalar clock) changed. This gives the stack
5039 traces at the start and end of the conflicting segment (well,
5040 as per comment just above, of one of the conflicting
5041 segments, if there are more than one). */
5042 key.ull = confTym;
5043 key.ec = NULL;
5044 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00005045 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00005046 firstIx = lastIx = 0;
5047 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00005048 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00005049 &key, &firstIx, &lastIx,
florian6bd9dc12012-11-23 16:17:43 +00005050 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
sewardj23f12002009-07-24 08:45:08 +00005051 );
sewardj8ab2c132009-08-02 09:34:35 +00005052 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
florian5e5cb002015-08-03 21:21:42 +00005053 "confTym %llu found %d (%ld,%ld)\n",
sewardj8ab2c132009-08-02 09:34:35 +00005054 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00005055 confThr, confTym, found, firstIx, lastIx);
5056 /* We can't indefinitely collect stack traces at VTS
5057 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00005058 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00005059 ones, which in turn means we might fail to find index value
5060 confTym in the array. */
5061 if (found) {
5062 ULong_n_EC *pair_start, *pair_end;
5063 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00005064 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00005065 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00005066 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00005067 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00005068 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00005069 lastIx+1 );
5070 /* from properties of VG_(lookupXA) and the comparison fn used: */
5071 tl_assert(pair_start->ull < pair_end->ull);
5072 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00005073 /* Could do a bit better here. It may be that pair_end
5074 doesn't have a stack, but the following entries in the
5075 array have the same scalar Kw and to have a stack. So
5076 we should search a bit further along the array than
5077 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00005078 } else {
sewardjffce8152011-06-24 10:09:41 +00005079 if (!confThr->llexit_done)
sewardj23f12002009-07-24 08:45:08 +00005080 hist1_seg_end = main_get_EC( confThr );
5081 }
5082 // seg_start could be NULL iff this is the first stack in the thread
5083 //if (seg_start) VG_(pp_ExeContext)(seg_start);
5084 //if (seg_end) VG_(pp_ExeContext)(seg_end);
sewardj60626642011-03-10 15:14:37 +00005085 hist1_conf_thr = confThr->hgthread;
sewardj23f12002009-07-24 08:45:08 +00005086 }
5087 }
5088
sewardj60626642011-03-10 15:14:37 +00005089 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00005090 szB, isWrite,
5091 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00005092}
5093
5094static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00005095 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00005096 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00005097 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
5098 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00005099}
5100
5101
5102/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00005103static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005104 /* The following are only needed for
5105 creating error reports. */
5106 Thr* acc_thr,
5107 Addr acc_addr, SizeT szB )
5108{
5109 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005110 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00005111
5112 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005113 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005114 tl_assert(is_sane_SVal_C(svOld));
5115 }
5116
sewardj1c0ce7a2009-07-01 08:10:49 +00005117 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005118 VtsID tviR = acc_thr->viR;
5119 VtsID tviW = acc_thr->viW;
5120 VtsID rmini = SVal__unC_Rmin(svOld);
5121 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005122 Bool leq = VtsID__cmpLEQ(rmini,tviR);
5123 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005124 /* no race */
5125 /* Note: RWLOCK subtlety: use tviW, not tviR */
5126 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5127 goto out;
5128 } else {
sewardjb0e009d2008-11-19 16:35:15 +00005129 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005130 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5131 tl_assert(leqxx);
5132 // same as in non-race case
5133 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5134 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005135 rmini, /* Cfailed */
5136 tviR, /* Kfailed */
5137 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005138 goto out;
5139 }
5140 }
5141 if (SVal__isA(svOld)) {
5142 /* reading no-access memory (sigh); leave unchanged */
5143 /* check for no pollution */
5144 tl_assert(svOld == SVal_NOACCESS);
5145 svNew = SVal_NOACCESS;
5146 goto out;
5147 }
sewardj23f12002009-07-24 08:45:08 +00005148 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005149 tl_assert(0);
5150
5151 out:
sewardj8f5374e2008-12-07 11:40:17 +00005152 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005153 tl_assert(is_sane_SVal_C(svNew));
5154 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005155 if (UNLIKELY(svNew != svOld)) {
5156 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005157 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005158 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005159 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005160 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005161 }
5162 }
5163 return svNew;
5164}
5165
5166
5167/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00005168static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005169 /* The following are only needed for
5170 creating error reports. */
5171 Thr* acc_thr,
5172 Addr acc_addr, SizeT szB )
5173{
5174 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005175 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00005176
5177 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005178 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005179 tl_assert(is_sane_SVal_C(svOld));
5180 }
5181
sewardj1c0ce7a2009-07-01 08:10:49 +00005182 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005183 VtsID tviW = acc_thr->viW;
5184 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005185 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5186 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005187 /* no race */
5188 svNew = SVal__mkC( tviW, tviW );
5189 goto out;
5190 } else {
5191 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00005192 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005193 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5194 tl_assert(leqxx);
5195 // same as in non-race case
5196 // proof: in the non-race case, we have
5197 // rmini <= wmini (invar on constraints)
5198 // tviW <= tviR (invar on thread clocks)
5199 // wmini <= tviW (from run-time check)
5200 // hence from transitivity of <= we have
5201 // rmini <= wmini <= tviW
5202 // and so join(rmini,tviW) == tviW
5203 // and join(wmini,tviW) == tviW
5204 // qed.
5205 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5206 VtsID__join2(wmini, tviW) );
5207 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005208 wmini, /* Cfailed */
5209 tviW, /* Kfailed */
5210 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005211 goto out;
5212 }
5213 }
5214 if (SVal__isA(svOld)) {
5215 /* writing no-access memory (sigh); leave unchanged */
5216 /* check for no pollution */
5217 tl_assert(svOld == SVal_NOACCESS);
5218 svNew = SVal_NOACCESS;
5219 goto out;
5220 }
sewardj23f12002009-07-24 08:45:08 +00005221 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005222 tl_assert(0);
5223
5224 out:
sewardj8f5374e2008-12-07 11:40:17 +00005225 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005226 tl_assert(is_sane_SVal_C(svNew));
5227 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005228 if (UNLIKELY(svNew != svOld)) {
5229 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005230 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005231 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005232 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005233 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005234 }
5235 }
5236 return svNew;
5237}
5238
5239
5240/////////////////////////////////////////////////////////
5241// //
5242// Apply core MSM to specific memory locations //
5243// //
5244/////////////////////////////////////////////////////////
5245
sewardj23f12002009-07-24 08:45:08 +00005246/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005247
sewardj23f12002009-07-24 08:45:08 +00005248static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005249 CacheLine* cl;
5250 UWord cloff, tno, toff;
5251 SVal svOld, svNew;
5252 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005253 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005254 cl = get_cacheline(a);
5255 cloff = get_cacheline_offset(a);
5256 tno = get_treeno(a);
5257 toff = get_tree_offset(a); /* == 0 .. 7 */
5258 descr = cl->descrs[tno];
5259 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5260 SVal* tree = &cl->svals[tno << 3];
5261 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005262 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005263 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5264 }
5265 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005266 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005267 if (CHECK_ZSM)
5268 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005269 cl->svals[cloff] = svNew;
5270}
5271
sewardj23f12002009-07-24 08:45:08 +00005272static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005273 CacheLine* cl;
5274 UWord cloff, tno, toff;
5275 SVal svOld, svNew;
5276 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005277 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005278 cl = get_cacheline(a);
5279 cloff = get_cacheline_offset(a);
5280 tno = get_treeno(a);
5281 toff = get_tree_offset(a); /* == 0 .. 7 */
5282 descr = cl->descrs[tno];
5283 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5284 SVal* tree = &cl->svals[tno << 3];
5285 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005286 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005287 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5288 }
5289 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005290 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005291 if (CHECK_ZSM)
5292 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005293 cl->svals[cloff] = svNew;
5294}
5295
sewardj23f12002009-07-24 08:45:08 +00005296/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005297
sewardj23f12002009-07-24 08:45:08 +00005298static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005299 CacheLine* cl;
5300 UWord cloff, tno, toff;
5301 SVal svOld, svNew;
5302 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005303 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005304 if (UNLIKELY(!aligned16(a))) goto slowcase;
5305 cl = get_cacheline(a);
5306 cloff = get_cacheline_offset(a);
5307 tno = get_treeno(a);
5308 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5309 descr = cl->descrs[tno];
5310 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5311 if (valid_value_is_below_me_16(descr, toff)) {
5312 goto slowcase;
5313 } else {
5314 SVal* tree = &cl->svals[tno << 3];
5315 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5316 }
sewardj8f5374e2008-12-07 11:40:17 +00005317 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005318 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5319 }
5320 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005321 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005322 if (CHECK_ZSM)
5323 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005324 cl->svals[cloff] = svNew;
5325 return;
5326 slowcase: /* misaligned, or must go further down the tree */
5327 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005328 zsm_sapply08__msmcread( thr, a + 0 );
5329 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005330}
5331
sewardj23f12002009-07-24 08:45:08 +00005332static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005333 CacheLine* cl;
5334 UWord cloff, tno, toff;
5335 SVal svOld, svNew;
5336 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005337 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005338 if (UNLIKELY(!aligned16(a))) goto slowcase;
5339 cl = get_cacheline(a);
5340 cloff = get_cacheline_offset(a);
5341 tno = get_treeno(a);
5342 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5343 descr = cl->descrs[tno];
5344 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5345 if (valid_value_is_below_me_16(descr, toff)) {
5346 goto slowcase;
5347 } else {
5348 SVal* tree = &cl->svals[tno << 3];
5349 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5350 }
sewardj8f5374e2008-12-07 11:40:17 +00005351 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005352 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5353 }
5354 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005355 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005356 if (CHECK_ZSM)
5357 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005358 cl->svals[cloff] = svNew;
5359 return;
5360 slowcase: /* misaligned, or must go further down the tree */
5361 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005362 zsm_sapply08__msmcwrite( thr, a + 0 );
5363 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005364}
5365
sewardj23f12002009-07-24 08:45:08 +00005366/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005367
sewardj23f12002009-07-24 08:45:08 +00005368static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005369 CacheLine* cl;
5370 UWord cloff, tno, toff;
5371 SVal svOld, svNew;
5372 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005373 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005374 if (UNLIKELY(!aligned32(a))) goto slowcase;
5375 cl = get_cacheline(a);
5376 cloff = get_cacheline_offset(a);
5377 tno = get_treeno(a);
5378 toff = get_tree_offset(a); /* == 0 or 4 */
5379 descr = cl->descrs[tno];
5380 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5381 if (valid_value_is_above_me_32(descr, toff)) {
5382 SVal* tree = &cl->svals[tno << 3];
5383 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5384 } else {
5385 goto slowcase;
5386 }
sewardj8f5374e2008-12-07 11:40:17 +00005387 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005388 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5389 }
5390 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005391 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005392 if (CHECK_ZSM)
5393 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005394 cl->svals[cloff] = svNew;
5395 return;
5396 slowcase: /* misaligned, or must go further down the tree */
5397 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005398 zsm_sapply16__msmcread( thr, a + 0 );
5399 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005400}
5401
sewardj23f12002009-07-24 08:45:08 +00005402static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005403 CacheLine* cl;
5404 UWord cloff, tno, toff;
5405 SVal svOld, svNew;
5406 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005407 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005408 if (UNLIKELY(!aligned32(a))) goto slowcase;
5409 cl = get_cacheline(a);
5410 cloff = get_cacheline_offset(a);
5411 tno = get_treeno(a);
5412 toff = get_tree_offset(a); /* == 0 or 4 */
5413 descr = cl->descrs[tno];
5414 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5415 if (valid_value_is_above_me_32(descr, toff)) {
5416 SVal* tree = &cl->svals[tno << 3];
5417 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5418 } else {
5419 goto slowcase;
5420 }
sewardj8f5374e2008-12-07 11:40:17 +00005421 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005422 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5423 }
5424 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005425 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005426 if (CHECK_ZSM)
5427 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005428 cl->svals[cloff] = svNew;
5429 return;
5430 slowcase: /* misaligned, or must go further down the tree */
5431 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005432 zsm_sapply16__msmcwrite( thr, a + 0 );
5433 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005434}
5435
sewardj23f12002009-07-24 08:45:08 +00005436/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005437
sewardj23f12002009-07-24 08:45:08 +00005438static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005439 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005440 UWord cloff, tno;
5441 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005442 SVal svOld, svNew;
5443 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005444 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005445 if (UNLIKELY(!aligned64(a))) goto slowcase;
5446 cl = get_cacheline(a);
5447 cloff = get_cacheline_offset(a);
5448 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005449 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005450 descr = cl->descrs[tno];
5451 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5452 goto slowcase;
5453 }
5454 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005455 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005456 if (CHECK_ZSM)
5457 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005458 cl->svals[cloff] = svNew;
5459 return;
5460 slowcase: /* misaligned, or must go further down the tree */
5461 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005462 zsm_sapply32__msmcread( thr, a + 0 );
5463 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005464}
5465
sewardj23f12002009-07-24 08:45:08 +00005466static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005467 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005468 UWord cloff, tno;
5469 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005470 SVal svOld, svNew;
5471 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005472 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005473 if (UNLIKELY(!aligned64(a))) goto slowcase;
5474 cl = get_cacheline(a);
5475 cloff = get_cacheline_offset(a);
5476 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005477 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005478 descr = cl->descrs[tno];
5479 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5480 goto slowcase;
5481 }
5482 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005483 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005484 if (CHECK_ZSM)
5485 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005486 cl->svals[cloff] = svNew;
5487 return;
5488 slowcase: /* misaligned, or must go further down the tree */
5489 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005490 zsm_sapply32__msmcwrite( thr, a + 0 );
5491 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005492}
5493
sewardj23f12002009-07-24 08:45:08 +00005494/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005495
5496static
sewardj23f12002009-07-24 08:45:08 +00005497void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005498 CacheLine* cl;
5499 UWord cloff, tno, toff;
5500 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005501 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005502 cl = get_cacheline(a);
5503 cloff = get_cacheline_offset(a);
5504 tno = get_treeno(a);
5505 toff = get_tree_offset(a); /* == 0 .. 7 */
5506 descr = cl->descrs[tno];
5507 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5508 SVal* tree = &cl->svals[tno << 3];
5509 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005510 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005511 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5512 }
5513 tl_assert(svNew != SVal_INVALID);
5514 cl->svals[cloff] = svNew;
5515}
5516
sewardj23f12002009-07-24 08:45:08 +00005517/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005518
5519static
sewardj23f12002009-07-24 08:45:08 +00005520void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005521 CacheLine* cl;
5522 UWord cloff, tno, toff;
5523 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005524 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005525 if (UNLIKELY(!aligned16(a))) goto slowcase;
5526 cl = get_cacheline(a);
5527 cloff = get_cacheline_offset(a);
5528 tno = get_treeno(a);
5529 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5530 descr = cl->descrs[tno];
5531 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5532 if (valid_value_is_below_me_16(descr, toff)) {
5533 /* Writing at this level. Need to fix up 'descr'. */
5534 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5535 /* At this point, the tree does not match cl->descr[tno] any
5536 more. The assignments below will fix it up. */
5537 } else {
5538 /* We can't indiscriminately write on the w16 node as in the
5539 w64 case, as that might make the node inconsistent with
5540 its parent. So first, pull down to this level. */
5541 SVal* tree = &cl->svals[tno << 3];
5542 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005543 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005544 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5545 }
5546 }
5547 tl_assert(svNew != SVal_INVALID);
5548 cl->svals[cloff + 0] = svNew;
5549 cl->svals[cloff + 1] = SVal_INVALID;
5550 return;
5551 slowcase: /* misaligned */
5552 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005553 zsm_swrite08( a + 0, svNew );
5554 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005555}
5556
sewardj23f12002009-07-24 08:45:08 +00005557/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005558
5559static
sewardj23f12002009-07-24 08:45:08 +00005560void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005561 CacheLine* cl;
5562 UWord cloff, tno, toff;
5563 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005564 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005565 if (UNLIKELY(!aligned32(a))) goto slowcase;
5566 cl = get_cacheline(a);
5567 cloff = get_cacheline_offset(a);
5568 tno = get_treeno(a);
5569 toff = get_tree_offset(a); /* == 0 or 4 */
5570 descr = cl->descrs[tno];
5571 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5572 if (valid_value_is_above_me_32(descr, toff)) {
5573 /* We can't indiscriminately write on the w32 node as in the
5574 w64 case, as that might make the node inconsistent with
5575 its parent. So first, pull down to this level. */
5576 SVal* tree = &cl->svals[tno << 3];
5577 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005578 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005579 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5580 } else {
5581 /* Writing at this level. Need to fix up 'descr'. */
5582 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5583 /* At this point, the tree does not match cl->descr[tno] any
5584 more. The assignments below will fix it up. */
5585 }
5586 }
5587 tl_assert(svNew != SVal_INVALID);
5588 cl->svals[cloff + 0] = svNew;
5589 cl->svals[cloff + 1] = SVal_INVALID;
5590 cl->svals[cloff + 2] = SVal_INVALID;
5591 cl->svals[cloff + 3] = SVal_INVALID;
5592 return;
5593 slowcase: /* misaligned */
5594 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005595 zsm_swrite16( a + 0, svNew );
5596 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005597}
5598
sewardj23f12002009-07-24 08:45:08 +00005599/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005600
5601static
sewardj23f12002009-07-24 08:45:08 +00005602void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005603 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005604 UWord cloff, tno;
5605 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00005606 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005607 if (UNLIKELY(!aligned64(a))) goto slowcase;
5608 cl = get_cacheline(a);
5609 cloff = get_cacheline_offset(a);
5610 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005611 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005612 cl->descrs[tno] = TREE_DESCR_64;
5613 tl_assert(svNew != SVal_INVALID);
5614 cl->svals[cloff + 0] = svNew;
5615 cl->svals[cloff + 1] = SVal_INVALID;
5616 cl->svals[cloff + 2] = SVal_INVALID;
5617 cl->svals[cloff + 3] = SVal_INVALID;
5618 cl->svals[cloff + 4] = SVal_INVALID;
5619 cl->svals[cloff + 5] = SVal_INVALID;
5620 cl->svals[cloff + 6] = SVal_INVALID;
5621 cl->svals[cloff + 7] = SVal_INVALID;
5622 return;
5623 slowcase: /* misaligned */
5624 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005625 zsm_swrite32( a + 0, svNew );
5626 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005627}
5628
sewardj23f12002009-07-24 08:45:08 +00005629/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005630
5631static
sewardj23f12002009-07-24 08:45:08 +00005632SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005633 CacheLine* cl;
5634 UWord cloff, tno, toff;
5635 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005636 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005637 cl = get_cacheline(a);
5638 cloff = get_cacheline_offset(a);
5639 tno = get_treeno(a);
5640 toff = get_tree_offset(a); /* == 0 .. 7 */
5641 descr = cl->descrs[tno];
5642 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5643 SVal* tree = &cl->svals[tno << 3];
5644 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5645 }
5646 return cl->svals[cloff];
5647}
5648
sewardj23f12002009-07-24 08:45:08 +00005649static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00005650 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00005651 stats__cline_scopy08s++;
5652 sv = zsm_sread08( src );
5653 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00005654}
5655
5656
sewardj23f12002009-07-24 08:45:08 +00005657/* Block-copy states (needed for implementing realloc()). Note this
5658 doesn't change the filtering arrangements. The caller of
5659 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00005660
sewardj23f12002009-07-24 08:45:08 +00005661static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005662{
5663 SizeT i;
5664 if (len == 0)
5665 return;
5666
5667 /* assert for non-overlappingness */
5668 tl_assert(src+len <= dst || dst+len <= src);
5669
5670 /* To be simple, just copy byte by byte. But so as not to wreck
5671 performance for later accesses to dst[0 .. len-1], normalise
5672 destination lines as we finish with them, and also normalise the
5673 line containing the first and last address. */
5674 for (i = 0; i < len; i++) {
5675 Bool normalise
5676 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5677 || i == 0 /* first in range */
5678 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00005679 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00005680 }
5681}
5682
5683
5684/* For setting address ranges to a given value. Has considerable
5685 sophistication so as to avoid generating large numbers of pointless
5686 cache loads/writebacks for large ranges. */
5687
5688/* Do small ranges in-cache, in the obvious way. */
5689static
sewardj23f12002009-07-24 08:45:08 +00005690void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005691{
5692 /* fast track a couple of common cases */
5693 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00005694 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005695 return;
5696 }
5697 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00005698 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005699 return;
5700 }
5701
5702 /* be completely general (but as efficient as possible) */
5703 if (len == 0) return;
5704
5705 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005706 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005707 a += 1;
5708 len -= 1;
5709 tl_assert(aligned16(a));
5710 }
5711 if (len == 0) return;
5712
5713 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005714 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005715 a += 2;
5716 len -= 2;
5717 tl_assert(aligned32(a));
5718 }
5719 if (len == 0) return;
5720
5721 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005722 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005723 a += 4;
5724 len -= 4;
5725 tl_assert(aligned64(a));
5726 }
5727 if (len == 0) return;
5728
5729 if (len >= 8) {
5730 tl_assert(aligned64(a));
5731 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00005732 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005733 a += 8;
5734 len -= 8;
5735 }
5736 tl_assert(aligned64(a));
5737 }
5738 if (len == 0) return;
5739
5740 if (len >= 4)
5741 tl_assert(aligned32(a));
5742 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005743 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005744 a += 4;
5745 len -= 4;
5746 }
5747 if (len == 0) return;
5748
5749 if (len >= 2)
5750 tl_assert(aligned16(a));
5751 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005752 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005753 a += 2;
5754 len -= 2;
5755 }
5756 if (len == 0) return;
5757
5758 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005759 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00005760 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00005761 len -= 1;
5762 }
5763 tl_assert(len == 0);
5764}
5765
5766
sewardj23f12002009-07-24 08:45:08 +00005767/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00005768 for larger ranges, try to operate directly on the out-of-cache
5769 representation, rather than dragging lines into the cache,
5770 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00005771 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00005772
sewardj23f12002009-07-24 08:45:08 +00005773 Note that this doesn't change the filtering arrangements. The
5774 caller of zsm_sset_range needs to attend to that. */
5775
5776static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005777{
5778 tl_assert(svNew != SVal_INVALID);
5779 stats__cache_make_New_arange += (ULong)len;
5780
5781 if (0 && len > 500)
florian5e5cb002015-08-03 21:21:42 +00005782 VG_(printf)("make New ( %#lx, %lu )\n", a, len );
sewardjf98e1c02008-10-25 16:22:41 +00005783
5784 if (0) {
5785 static UWord n_New_in_cache = 0;
5786 static UWord n_New_not_in_cache = 0;
5787 /* tag is 'a' with the in-line offset masked out,
5788 eg a[31]..a[4] 0000 */
5789 Addr tag = a & ~(N_LINE_ARANGE - 1);
5790 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5791 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5792 n_New_in_cache++;
5793 } else {
5794 n_New_not_in_cache++;
5795 }
5796 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5797 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5798 n_New_in_cache, n_New_not_in_cache );
5799 }
5800
5801 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00005802 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005803 } else {
5804 Addr before_start = a;
5805 Addr aligned_start = cacheline_ROUNDUP(a);
5806 Addr after_start = cacheline_ROUNDDN(a + len);
5807 UWord before_len = aligned_start - before_start;
5808 UWord aligned_len = after_start - aligned_start;
5809 UWord after_len = a + len - after_start;
5810 tl_assert(before_start <= aligned_start);
5811 tl_assert(aligned_start <= after_start);
5812 tl_assert(before_len < N_LINE_ARANGE);
5813 tl_assert(after_len < N_LINE_ARANGE);
5814 tl_assert(get_cacheline_offset(aligned_start) == 0);
5815 if (get_cacheline_offset(a) == 0) {
5816 tl_assert(before_len == 0);
5817 tl_assert(a == aligned_start);
5818 }
5819 if (get_cacheline_offset(a+len) == 0) {
5820 tl_assert(after_len == 0);
5821 tl_assert(after_start == a+len);
5822 }
5823 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005824 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005825 }
5826 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005827 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005828 }
5829 stats__cache_make_New_inZrep += (ULong)aligned_len;
5830
5831 while (1) {
5832 Addr tag;
5833 UWord wix;
5834 if (aligned_start >= after_start)
5835 break;
5836 tl_assert(get_cacheline_offset(aligned_start) == 0);
5837 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5838 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5839 if (tag == cache_shmem.tags0[wix]) {
5840 UWord i;
5841 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005842 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005843 } else {
5844 UWord i;
5845 Word zix;
5846 SecMap* sm;
5847 LineZ* lineZ;
5848 /* This line is not in the cache. Do not force it in; instead
5849 modify it in-place. */
5850 /* find the Z line to write in and rcdec it or the
5851 associated F line. */
5852 find_Z_for_writing( &sm, &zix, tag );
5853 tl_assert(sm);
5854 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5855 lineZ = &sm->linesZ[zix];
5856 lineZ->dict[0] = svNew;
5857 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5858 for (i = 0; i < N_LINE_ARANGE/4; i++)
5859 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5860 rcinc_LineZ(lineZ);
5861 }
5862 aligned_start += N_LINE_ARANGE;
5863 aligned_len -= N_LINE_ARANGE;
5864 }
5865 tl_assert(aligned_start == after_start);
5866 tl_assert(aligned_len == 0);
5867 }
5868}
5869
5870
5871/////////////////////////////////////////////////////////
5872// //
sewardj23f12002009-07-24 08:45:08 +00005873// Front-filtering accesses //
5874// //
5875/////////////////////////////////////////////////////////
5876
5877static UWord stats__f_ac = 0;
5878static UWord stats__f_sk = 0;
5879
5880#if 0
5881# define STATS__F_SHOW \
5882 do { \
5883 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5884 VG_(printf)("filters: ac %lu sk %lu\n", \
5885 stats__f_ac, stats__f_sk); \
5886 } while (0)
5887#else
5888# define STATS__F_SHOW /* */
5889#endif
5890
5891void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5892 stats__f_ac++;
5893 STATS__F_SHOW;
5894 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5895 stats__f_sk++;
5896 return;
5897 }
5898 zsm_sapply08__msmcwrite(thr, a);
5899}
5900
5901void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5902 stats__f_ac++;
5903 STATS__F_SHOW;
5904 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5905 stats__f_sk++;
5906 return;
5907 }
5908 zsm_sapply16__msmcwrite(thr, a);
5909}
5910
5911void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5912 stats__f_ac++;
5913 STATS__F_SHOW;
5914 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5915 stats__f_sk++;
5916 return;
5917 }
5918 zsm_sapply32__msmcwrite(thr, a);
5919}
5920
5921void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5922 stats__f_ac++;
5923 STATS__F_SHOW;
5924 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5925 stats__f_sk++;
5926 return;
5927 }
5928 zsm_sapply64__msmcwrite(thr, a);
5929}
5930
5931void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5932{
5933 /* fast track a couple of common cases */
5934 if (len == 4 && aligned32(a)) {
5935 zsm_sapply32_f__msmcwrite( thr, a );
5936 return;
5937 }
5938 if (len == 8 && aligned64(a)) {
5939 zsm_sapply64_f__msmcwrite( thr, a );
5940 return;
5941 }
5942
5943 /* be completely general (but as efficient as possible) */
5944 if (len == 0) return;
5945
5946 if (!aligned16(a) && len >= 1) {
5947 zsm_sapply08_f__msmcwrite( thr, a );
5948 a += 1;
5949 len -= 1;
5950 tl_assert(aligned16(a));
5951 }
5952 if (len == 0) return;
5953
5954 if (!aligned32(a) && len >= 2) {
5955 zsm_sapply16_f__msmcwrite( thr, a );
5956 a += 2;
5957 len -= 2;
5958 tl_assert(aligned32(a));
5959 }
5960 if (len == 0) return;
5961
5962 if (!aligned64(a) && len >= 4) {
5963 zsm_sapply32_f__msmcwrite( thr, a );
5964 a += 4;
5965 len -= 4;
5966 tl_assert(aligned64(a));
5967 }
5968 if (len == 0) return;
5969
5970 if (len >= 8) {
5971 tl_assert(aligned64(a));
5972 while (len >= 8) {
5973 zsm_sapply64_f__msmcwrite( thr, a );
5974 a += 8;
5975 len -= 8;
5976 }
5977 tl_assert(aligned64(a));
5978 }
5979 if (len == 0) return;
5980
5981 if (len >= 4)
5982 tl_assert(aligned32(a));
5983 if (len >= 4) {
5984 zsm_sapply32_f__msmcwrite( thr, a );
5985 a += 4;
5986 len -= 4;
5987 }
5988 if (len == 0) return;
5989
5990 if (len >= 2)
5991 tl_assert(aligned16(a));
5992 if (len >= 2) {
5993 zsm_sapply16_f__msmcwrite( thr, a );
5994 a += 2;
5995 len -= 2;
5996 }
5997 if (len == 0) return;
5998
5999 if (len >= 1) {
6000 zsm_sapply08_f__msmcwrite( thr, a );
6001 //a += 1;
6002 len -= 1;
6003 }
6004 tl_assert(len == 0);
6005}
6006
6007void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
6008 stats__f_ac++;
6009 STATS__F_SHOW;
6010 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
6011 stats__f_sk++;
6012 return;
6013 }
6014 zsm_sapply08__msmcread(thr, a);
6015}
6016
6017void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
6018 stats__f_ac++;
6019 STATS__F_SHOW;
6020 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
6021 stats__f_sk++;
6022 return;
6023 }
6024 zsm_sapply16__msmcread(thr, a);
6025}
6026
6027void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
6028 stats__f_ac++;
6029 STATS__F_SHOW;
6030 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
6031 stats__f_sk++;
6032 return;
6033 }
6034 zsm_sapply32__msmcread(thr, a);
6035}
6036
6037void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
6038 stats__f_ac++;
6039 STATS__F_SHOW;
6040 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
6041 stats__f_sk++;
6042 return;
6043 }
6044 zsm_sapply64__msmcread(thr, a);
6045}
6046
6047void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
6048{
6049 /* fast track a couple of common cases */
6050 if (len == 4 && aligned32(a)) {
6051 zsm_sapply32_f__msmcread( thr, a );
6052 return;
6053 }
6054 if (len == 8 && aligned64(a)) {
6055 zsm_sapply64_f__msmcread( thr, a );
6056 return;
6057 }
6058
6059 /* be completely general (but as efficient as possible) */
6060 if (len == 0) return;
6061
6062 if (!aligned16(a) && len >= 1) {
6063 zsm_sapply08_f__msmcread( thr, a );
6064 a += 1;
6065 len -= 1;
6066 tl_assert(aligned16(a));
6067 }
6068 if (len == 0) return;
6069
6070 if (!aligned32(a) && len >= 2) {
6071 zsm_sapply16_f__msmcread( thr, a );
6072 a += 2;
6073 len -= 2;
6074 tl_assert(aligned32(a));
6075 }
6076 if (len == 0) return;
6077
6078 if (!aligned64(a) && len >= 4) {
6079 zsm_sapply32_f__msmcread( thr, a );
6080 a += 4;
6081 len -= 4;
6082 tl_assert(aligned64(a));
6083 }
6084 if (len == 0) return;
6085
6086 if (len >= 8) {
6087 tl_assert(aligned64(a));
6088 while (len >= 8) {
6089 zsm_sapply64_f__msmcread( thr, a );
6090 a += 8;
6091 len -= 8;
6092 }
6093 tl_assert(aligned64(a));
6094 }
6095 if (len == 0) return;
6096
6097 if (len >= 4)
6098 tl_assert(aligned32(a));
6099 if (len >= 4) {
6100 zsm_sapply32_f__msmcread( thr, a );
6101 a += 4;
6102 len -= 4;
6103 }
6104 if (len == 0) return;
6105
6106 if (len >= 2)
6107 tl_assert(aligned16(a));
6108 if (len >= 2) {
6109 zsm_sapply16_f__msmcread( thr, a );
6110 a += 2;
6111 len -= 2;
6112 }
6113 if (len == 0) return;
6114
6115 if (len >= 1) {
6116 zsm_sapply08_f__msmcread( thr, a );
6117 //a += 1;
6118 len -= 1;
6119 }
6120 tl_assert(len == 0);
6121}
6122
6123void libhb_Thr_resumes ( Thr* thr )
6124{
6125 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006126 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006127 tl_assert(!thr->llexit_done);
sewardj23f12002009-07-24 08:45:08 +00006128 Filter__clear(thr->filter, "libhb_Thr_resumes");
6129 /* A kludge, but .. if this thread doesn't have any marker stacks
6130 at all, get one right now. This is easier than figuring out
6131 exactly when at thread startup we can and can't take a stack
6132 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00006133 if (HG_(clo_history_level) == 1) {
6134 tl_assert(thr->local_Kws_n_stacks);
6135 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
6136 note_local_Kw_n_stack_for(thr);
6137 }
sewardj23f12002009-07-24 08:45:08 +00006138}
6139
6140
6141/////////////////////////////////////////////////////////
6142// //
sewardjf98e1c02008-10-25 16:22:41 +00006143// Synchronisation objects //
6144// //
6145/////////////////////////////////////////////////////////
6146
sewardjffce8152011-06-24 10:09:41 +00006147/* A double linked list of all the SO's. */
6148SO* admin_SO = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006149
sewardjffce8152011-06-24 10:09:41 +00006150static SO* SO__Alloc ( void )
6151{
sewardjf98e1c02008-10-25 16:22:41 +00006152 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6153 so->viR = VtsID_INVALID;
6154 so->viW = VtsID_INVALID;
6155 so->magic = SO_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00006156 /* Add to double linked list */
6157 if (admin_SO) {
6158 tl_assert(admin_SO->admin_prev == NULL);
6159 admin_SO->admin_prev = so;
6160 so->admin_next = admin_SO;
6161 } else {
6162 so->admin_next = NULL;
6163 }
6164 so->admin_prev = NULL;
6165 admin_SO = so;
6166 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006167 return so;
6168}
sewardjffce8152011-06-24 10:09:41 +00006169
6170static void SO__Dealloc ( SO* so )
6171{
sewardjf98e1c02008-10-25 16:22:41 +00006172 tl_assert(so);
6173 tl_assert(so->magic == SO_MAGIC);
6174 if (so->viR == VtsID_INVALID) {
6175 tl_assert(so->viW == VtsID_INVALID);
6176 } else {
6177 tl_assert(so->viW != VtsID_INVALID);
6178 VtsID__rcdec(so->viR);
6179 VtsID__rcdec(so->viW);
6180 }
6181 so->magic = 0;
sewardjffce8152011-06-24 10:09:41 +00006182 /* Del from double linked list */
6183 if (so->admin_prev)
6184 so->admin_prev->admin_next = so->admin_next;
6185 if (so->admin_next)
6186 so->admin_next->admin_prev = so->admin_prev;
6187 if (so == admin_SO)
6188 admin_SO = so->admin_next;
6189 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006190 HG_(free)( so );
6191}
6192
6193
6194/////////////////////////////////////////////////////////
6195// //
6196// Top Level API //
6197// //
6198/////////////////////////////////////////////////////////
6199
florian6bd9dc12012-11-23 16:17:43 +00006200static void show_thread_state ( const HChar* str, Thr* t )
sewardjf98e1c02008-10-25 16:22:41 +00006201{
6202 if (1) return;
6203 if (t->viR == t->viW) {
6204 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6205 VtsID__pp( t->viR );
6206 VG_(printf)("%s","\n");
6207 } else {
6208 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6209 VtsID__pp( t->viR );
6210 VG_(printf)(" viW %u==", t->viW);
6211 VtsID__pp( t->viW );
6212 VG_(printf)("%s","\n");
6213 }
6214}
6215
6216
6217Thr* libhb_init (
6218 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00006219 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00006220 )
6221{
6222 Thr* thr;
6223 VtsID vi;
sewardje4cce742011-02-24 15:25:24 +00006224
6225 // We will have to have to store a large number of these,
6226 // so make sure they're the size we expect them to be.
philippe328d6622015-05-25 17:24:27 +00006227 STATIC_ASSERT(sizeof(ScalarTS) == 8);
sewardjffce8152011-06-24 10:09:41 +00006228
6229 /* because first 1024 unusable */
philippe328d6622015-05-25 17:24:27 +00006230 STATIC_ASSERT(SCALARTS_N_THRBITS >= 11);
6231 /* so as to fit in a UInt w/ 5 bits to spare (see defn of
6232 Thr_n_RCEC and TSW). */
6233 STATIC_ASSERT(SCALARTS_N_THRBITS <= 27);
sewardjffce8152011-06-24 10:09:41 +00006234
6235 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6236 (32-bit). It's not correctness-critical, but there are a lot of
6237 them, so it's important from a space viewpoint. Unfortunately
6238 we simply can't pack it into 2 words on a 32-bit target. */
philippe328d6622015-05-25 17:24:27 +00006239 STATIC_ASSERT( (sizeof(UWord) == 8 && sizeof(Thr_n_RCEC) == 16)
6240 || (sizeof(UWord) == 4 && sizeof(Thr_n_RCEC) == 12));
6241 STATIC_ASSERT(sizeof(TSW) == sizeof(UInt));
sewardjffce8152011-06-24 10:09:41 +00006242
6243 /* Word sets really are 32 bits. Even on a 64 bit target. */
philippe328d6622015-05-25 17:24:27 +00006244 STATIC_ASSERT(sizeof(WordSetID) == 4);
6245 STATIC_ASSERT(sizeof(WordSet) == sizeof(WordSetID));
sewardje4cce742011-02-24 15:25:24 +00006246
sewardjf98e1c02008-10-25 16:22:41 +00006247 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00006248 tl_assert(get_EC);
6249 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00006250 main_get_EC = get_EC;
6251
6252 // No need to initialise hg_wordfm.
6253 // No need to initialise hg_wordset.
6254
sewardj7aa38a92011-02-27 23:04:12 +00006255 /* Allocated once and never deallocated. Used as a temporary in
6256 VTS singleton, tick and join operations. */
6257 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6258 temp_max_sized_VTS->id = VtsID_INVALID;
philippec3508652015-03-28 12:01:58 +00006259 verydead_thread_tables_init();
sewardjf98e1c02008-10-25 16:22:41 +00006260 vts_set_init();
6261 vts_tab_init();
6262 event_map_init();
6263 VtsID__invalidate_caches();
6264
6265 // initialise shadow memory
philippe1475a7f2015-05-11 19:45:08 +00006266 zsm_init( );
sewardjf98e1c02008-10-25 16:22:41 +00006267
6268 thr = Thr__new();
6269 vi = VtsID__mk_Singleton( thr, 1 );
6270 thr->viR = vi;
6271 thr->viW = vi;
6272 VtsID__rcinc(thr->viR);
6273 VtsID__rcinc(thr->viW);
6274
6275 show_thread_state(" root", thr);
6276 return thr;
6277}
6278
sewardj23f12002009-07-24 08:45:08 +00006279
sewardjf98e1c02008-10-25 16:22:41 +00006280Thr* libhb_create ( Thr* parent )
6281{
6282 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6283 the child's index. Since the child's index is guaranteed
6284 unique, it has never been seen before, so the implicit value
6285 before the tick is zero and after that is one. */
6286 Thr* child = Thr__new();
6287
6288 child->viR = VtsID__tick( parent->viR, child );
6289 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00006290 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00006291 VtsID__rcinc(child->viR);
6292 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006293 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00006294 early for that - it may not have a valid TId yet. So, let
6295 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00006296
6297 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6298 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6299
6300 /* and the parent has to move along too */
6301 VtsID__rcdec(parent->viR);
6302 VtsID__rcdec(parent->viW);
6303 parent->viR = VtsID__tick( parent->viR, parent );
6304 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00006305 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00006306 VtsID__rcinc(parent->viR);
6307 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006308 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00006309
6310 show_thread_state(" child", child);
6311 show_thread_state("parent", parent);
6312
6313 return child;
6314}
6315
6316/* Shut down the library, and print stats (in fact that's _all_
6317 this is for. */
6318void libhb_shutdown ( Bool show_stats )
6319{
6320 if (show_stats) {
6321 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6322 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6323 stats__secmaps_allocd,
6324 stats__secmap_ga_space_covered);
6325 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6326 stats__secmap_linesZ_allocd,
6327 stats__secmap_linesZ_bytes);
philippe0fb30ac2015-05-15 13:17:17 +00006328 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)"
6329 " (%'10lu used)\n",
philippe71ed3c92015-05-17 19:32:42 +00006330 VG_(sizePA) (LineF_pool_allocator),
6331 VG_(sizePA) (LineF_pool_allocator) * sizeof(LineF),
philippe0fb30ac2015-05-15 13:17:17 +00006332 shmem__SecMap_used_linesF());
philippef54cb662015-05-10 22:19:31 +00006333 VG_(printf)(" secmaps: %'10lu in map (can be scanGCed %'5lu)"
6334 " #%lu scanGC \n",
6335 stats__secmaps_in_map_shmem,
6336 shmem__SecMap_do_GC(False /* really do GC */),
6337 stats__secmaps_scanGC);
6338 tl_assert (VG_(sizeFM) (map_shmem) == stats__secmaps_in_map_shmem);
6339 VG_(printf)(" secmaps: %'10lu in freelist,"
6340 " total (scanGCed %'lu, ssetGCed %'lu)\n",
6341 SecMap_freelist_length(),
6342 stats__secmaps_scanGCed,
6343 stats__secmaps_ssetGCed);
sewardjf98e1c02008-10-25 16:22:41 +00006344 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6345 stats__secmaps_search, stats__secmaps_search_slow);
6346
6347 VG_(printf)("%s","\n");
6348 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6349 stats__cache_totrefs, stats__cache_totmisses );
6350 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6351 stats__cache_Z_fetches, stats__cache_F_fetches );
6352 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6353 stats__cache_Z_wbacks, stats__cache_F_wbacks );
philippef54cb662015-05-10 22:19:31 +00006354 VG_(printf)(" cache: %'14lu flushes_invals\n",
6355 stats__cache_flushes_invals );
sewardjf98e1c02008-10-25 16:22:41 +00006356 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6357 stats__cache_make_New_arange,
6358 stats__cache_make_New_inZrep);
6359
6360 VG_(printf)("%s","\n");
6361 VG_(printf)(" cline: %'10lu normalises\n",
6362 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00006363 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6364 stats__cline_cread64s,
6365 stats__cline_cread32s,
6366 stats__cline_cread16s,
6367 stats__cline_cread08s );
6368 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6369 stats__cline_cwrite64s,
6370 stats__cline_cwrite32s,
6371 stats__cline_cwrite16s,
6372 stats__cline_cwrite08s );
6373 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6374 stats__cline_swrite64s,
6375 stats__cline_swrite32s,
6376 stats__cline_swrite16s,
6377 stats__cline_swrite08s );
6378 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6379 stats__cline_sread08s, stats__cline_scopy08s );
philippef54cb662015-05-10 22:19:31 +00006380 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu"
6381 " 2to1 %'12lu\n",
6382 stats__cline_64to32splits, stats__cline_32to16splits,
6383 stats__cline_16to8splits );
6384 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu"
6385 " 2to1 %'12lu\n",
6386 stats__cline_64to32pulldown, stats__cline_32to16pulldown,
6387 stats__cline_16to8pulldown );
sewardjf98e1c02008-10-25 16:22:41 +00006388 if (0)
philippef54cb662015-05-10 22:19:31 +00006389 VG_(printf)(" cline: sizeof(CacheLineZ) %ld,"
6390 " covers %ld bytes of arange\n",
6391 (Word)sizeof(LineZ),
6392 (Word)N_LINE_ARANGE);
sewardjf98e1c02008-10-25 16:22:41 +00006393
6394 VG_(printf)("%s","\n");
6395
sewardjc8028ad2010-05-05 09:34:42 +00006396 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006397 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00006398 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006399 stats__msmcwrite, stats__msmcwrite_change);
6400 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6401 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00006402 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6403 stats__join2_queries, stats__join2_misses);
6404
6405 VG_(printf)("%s","\n");
philippef54cb662015-05-10 22:19:31 +00006406 VG_(printf)(" libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6407 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6408 VG_(printf)(" libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6409 stats__vts__cmp_structural, stats__vts__cmp_structural_slow);
6410 VG_(printf)(" libhb: VTSset: find__or__clone_and_add %'lu"
6411 " (%'lu allocd)\n",
sewardj7aa38a92011-02-27 23:04:12 +00006412 stats__vts_set__focaa, stats__vts_set__focaa_a );
sewardjc8028ad2010-05-05 09:34:42 +00006413 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6414 stats__vts__indexat_slow );
6415
6416 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00006417 VG_(printf)(
6418 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6419 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6420 );
philippe2bd23262015-05-11 20:56:49 +00006421 VG_(printf)(" libhb: #%lu vts_tab GC #%lu vts pruning\n",
6422 stats__vts_tab_GC, stats__vts_pruning);
sewardjf98e1c02008-10-25 16:22:41 +00006423 VG_(printf)( " libhb: %lu entries in vts_set\n",
6424 VG_(sizeFM)( vts_set ) );
6425
6426 VG_(printf)("%s","\n");
philippe900c5352015-03-24 14:02:44 +00006427 {
6428 UInt live = 0;
6429 UInt llexit_done = 0;
6430 UInt joinedwith_done = 0;
6431 UInt llexit_and_joinedwith_done = 0;
6432
6433 Thread* hgthread = get_admin_threads();
6434 tl_assert(hgthread);
6435 while (hgthread) {
6436 Thr* hbthr = hgthread->hbthr;
6437 tl_assert(hbthr);
6438 if (hbthr->llexit_done && hbthr->joinedwith_done)
6439 llexit_and_joinedwith_done++;
6440 else if (hbthr->llexit_done)
6441 llexit_done++;
6442 else if (hbthr->joinedwith_done)
6443 joinedwith_done++;
6444 else
6445 live++;
6446 hgthread = hgthread->admin;
6447 }
florian5e5cb002015-08-03 21:21:42 +00006448 VG_(printf)(" libhb: threads live: %u exit_and_joinedwith %u"
6449 " exit %u joinedwith %u\n",
philippe900c5352015-03-24 14:02:44 +00006450 live, llexit_and_joinedwith_done,
6451 llexit_done, joinedwith_done);
philippec3508652015-03-28 12:01:58 +00006452 VG_(printf)(" libhb: %d verydead_threads, "
6453 "%d verydead_threads_not_pruned\n",
6454 (int) VG_(sizeXA)( verydead_thread_table),
6455 (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6456 tl_assert (VG_(sizeXA)( verydead_thread_table)
6457 + VG_(sizeXA)( verydead_thread_table_not_pruned)
6458 == llexit_and_joinedwith_done);
philippe900c5352015-03-24 14:02:44 +00006459 }
6460
6461 VG_(printf)("%s","\n");
philippe328d6622015-05-25 17:24:27 +00006462 VG_(printf)( " libhb: oldrefHTN %lu (%'d bytes)\n",
6463 oldrefHTN, (int)(oldrefHTN * sizeof(OldRef)));
6464 tl_assert (oldrefHTN == VG_(HT_count_nodes) (oldrefHT));
6465 VG_(printf)( " libhb: oldref lookup found=%lu notfound=%lu\n",
6466 stats__evm__lookup_found, stats__evm__lookup_notfound);
6467 if (VG_(clo_verbosity) > 1)
6468 VG_(HT_print_stats) (oldrefHT, cmp_oldref_tsw);
6469 VG_(printf)( " libhb: oldref bind tsw/rcec "
philippe3a085bf2015-05-26 21:27:36 +00006470 "==/==:%'lu ==/!=:%'lu !=/!=:%'lu\n",
philippe328d6622015-05-25 17:24:27 +00006471 stats__ctxt_eq_tsw_eq_rcec, stats__ctxt_eq_tsw_neq_rcec,
6472 stats__ctxt_neq_tsw_neq_rcec);
philippe3a085bf2015-05-26 21:27:36 +00006473 VG_(printf)( " libhb: ctxt__rcdec calls %'lu. rcec gc discards %'lu\n",
philippe328d6622015-05-25 17:24:27 +00006474 stats__ctxt_rcdec_calls, stats__ctxt_rcec_gc_discards);
philippecabdbb52015-04-20 21:33:16 +00006475 VG_(printf)( " libhb: contextTab: %lu slots,"
6476 " %lu cur ents(ref'd %lu),"
philippe06bc23a2015-04-17 21:19:43 +00006477 " %lu max ents\n",
sewardjf98e1c02008-10-25 16:22:41 +00006478 (UWord)N_RCEC_TAB,
philippecabdbb52015-04-20 21:33:16 +00006479 stats__ctxt_tab_curr, RCEC_referenced,
6480 stats__ctxt_tab_max );
philippe47124e92015-04-25 14:00:24 +00006481 {
6482# define MAXCHAIN 10
6483 UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
6484 UInt non0chain = 0;
6485 UInt n;
6486 UInt i;
6487 RCEC *p;
6488
6489 for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
6490 for (i = 0; i < N_RCEC_TAB; i++) {
6491 n = 0;
6492 for (p = contextTab[i]; p; p = p->next)
6493 n++;
6494 if (n < MAXCHAIN)
6495 chains[n]++;
6496 else
6497 chains[MAXCHAIN]++;
6498 if (n > 0)
6499 non0chain++;
6500 }
6501 VG_(printf)( " libhb: contextTab chain of [length]=nchain."
6502 " Avg chain len %3.1f\n"
6503 " ",
6504 (Double)stats__ctxt_tab_curr
6505 / (Double)(non0chain ? non0chain : 1));
6506 for (i = 0; i <= MAXCHAIN; i++) {
6507 if (chains[i] != 0)
florian5e5cb002015-08-03 21:21:42 +00006508 VG_(printf)( "[%u%s]=%u ",
philippe47124e92015-04-25 14:00:24 +00006509 i, i == MAXCHAIN ? "+" : "",
6510 chains[i]);
6511 }
6512 VG_(printf)( "\n");
6513# undef MAXCHAIN
6514 }
sewardjf98e1c02008-10-25 16:22:41 +00006515 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6516 stats__ctxt_tab_qs,
6517 stats__ctxt_tab_cmps );
6518#if 0
6519 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6520 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6521 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6522 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6523 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6524 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6525 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6526 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6527 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6528 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6529 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6530 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6531 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6532 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6533
6534 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6535 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6536 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6537 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6538#endif
6539
6540 VG_(printf)("%s","<<< END libhb stats >>>\n");
6541 VG_(printf)("%s","\n");
6542
6543 }
6544}
6545
sewardjffce8152011-06-24 10:09:41 +00006546/* Receive notification that a thread has low level exited. The
6547 significance here is that we do not expect to see any more memory
6548 references from it. */
sewardjf98e1c02008-10-25 16:22:41 +00006549void libhb_async_exit ( Thr* thr )
6550{
sewardj23f12002009-07-24 08:45:08 +00006551 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006552 tl_assert(!thr->llexit_done);
6553 thr->llexit_done = True;
sewardj2d2ea2f2009-08-02 10:15:07 +00006554
6555 /* free up Filter and local_Kws_n_stacks (well, actually not the
6556 latter ..) */
6557 tl_assert(thr->filter);
6558 HG_(free)(thr->filter);
6559 thr->filter = NULL;
6560
sewardjffce8152011-06-24 10:09:41 +00006561 /* Tell the VTS mechanism this thread has exited, so it can
6562 participate in VTS pruning. Note this can only happen if the
6563 thread has both ll_exited and has been joined with. */
6564 if (thr->joinedwith_done)
6565 VTS__declare_thread_very_dead(thr);
6566
sewardj2d2ea2f2009-08-02 10:15:07 +00006567 /* Another space-accuracy tradeoff. Do we want to be able to show
6568 H1 history for conflicts in threads which have since exited? If
6569 yes, then we better not free up thr->local_Kws_n_stacks. The
6570 downside is a potential per-thread leak of up to
6571 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6572 XArray average overcommit factor is (1.5 I'd guess). */
6573 // hence:
6574 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6575 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006576}
6577
sewardjffce8152011-06-24 10:09:41 +00006578/* Receive notification that a thread has been joined with. The
6579 significance here is that we do not expect to see any further
6580 references to its vector clocks (Thr::viR and Thr::viW). */
6581void libhb_joinedwith_done ( Thr* thr )
6582{
6583 tl_assert(thr);
6584 /* Caller must ensure that this is only ever called once per Thr. */
6585 tl_assert(!thr->joinedwith_done);
6586 thr->joinedwith_done = True;
6587 if (thr->llexit_done)
6588 VTS__declare_thread_very_dead(thr);
6589}
6590
6591
sewardjf98e1c02008-10-25 16:22:41 +00006592/* Both Segs and SOs point to VTSs. However, there is no sharing, so
6593 a Seg that points at a VTS is its one-and-only owner, and ditto for
6594 a SO that points at a VTS. */
6595
6596SO* libhb_so_alloc ( void )
6597{
6598 return SO__Alloc();
6599}
6600
6601void libhb_so_dealloc ( SO* so )
6602{
6603 tl_assert(so);
6604 tl_assert(so->magic == SO_MAGIC);
6605 SO__Dealloc(so);
6606}
6607
6608/* See comments in libhb.h for details on the meaning of
6609 strong vs weak sends and strong vs weak receives. */
6610void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6611{
6612 /* Copy the VTSs from 'thr' into the sync object, and then move
6613 the thread along one step. */
6614
6615 tl_assert(so);
6616 tl_assert(so->magic == SO_MAGIC);
6617
6618 /* stay sane .. a thread's read-clock must always lead or be the
6619 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00006620 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6621 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00006622 }
6623
6624 /* since we're overwriting the VtsIDs in the SO, we need to drop
6625 any references made by the previous contents thereof */
6626 if (so->viR == VtsID_INVALID) {
6627 tl_assert(so->viW == VtsID_INVALID);
6628 so->viR = thr->viR;
6629 so->viW = thr->viW;
6630 VtsID__rcinc(so->viR);
6631 VtsID__rcinc(so->viW);
6632 } else {
6633 /* In a strong send, we dump any previous VC in the SO and
6634 install the sending thread's VC instead. For a weak send we
6635 must join2 with what's already there. */
6636 tl_assert(so->viW != VtsID_INVALID);
6637 VtsID__rcdec(so->viR);
6638 VtsID__rcdec(so->viW);
6639 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6640 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6641 VtsID__rcinc(so->viR);
6642 VtsID__rcinc(so->viW);
6643 }
6644
6645 /* move both parent clocks along */
6646 VtsID__rcdec(thr->viR);
6647 VtsID__rcdec(thr->viW);
6648 thr->viR = VtsID__tick( thr->viR, thr );
6649 thr->viW = VtsID__tick( thr->viW, thr );
sewardjffce8152011-06-24 10:09:41 +00006650 if (!thr->llexit_done) {
sewardj2d2ea2f2009-08-02 10:15:07 +00006651 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00006652 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006653 }
sewardjf98e1c02008-10-25 16:22:41 +00006654 VtsID__rcinc(thr->viR);
6655 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006656
sewardjf98e1c02008-10-25 16:22:41 +00006657 if (strong_send)
6658 show_thread_state("s-send", thr);
6659 else
6660 show_thread_state("w-send", thr);
6661}
6662
6663void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6664{
6665 tl_assert(so);
6666 tl_assert(so->magic == SO_MAGIC);
6667
6668 if (so->viR != VtsID_INVALID) {
6669 tl_assert(so->viW != VtsID_INVALID);
6670
6671 /* Weak receive (basically, an R-acquisition of a R-W lock).
6672 This advances the read-clock of the receiver, but not the
6673 write-clock. */
6674 VtsID__rcdec(thr->viR);
6675 thr->viR = VtsID__join2( thr->viR, so->viR );
6676 VtsID__rcinc(thr->viR);
6677
sewardj90eb22e2009-07-28 20:22:18 +00006678 /* At one point (r10589) it seemed safest to tick the clocks for
6679 the receiving thread after the join. But on reflection, I
6680 wonder if that might cause it to 'overtake' constraints,
6681 which could lead to missing races. So, back out that part of
6682 r10589. */
6683 //VtsID__rcdec(thr->viR);
6684 //thr->viR = VtsID__tick( thr->viR, thr );
6685 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00006686
sewardjf98e1c02008-10-25 16:22:41 +00006687 /* For a strong receive, we also advance the receiver's write
6688 clock, which means the receive as a whole is essentially
6689 equivalent to a W-acquisition of a R-W lock. */
6690 if (strong_recv) {
6691 VtsID__rcdec(thr->viW);
6692 thr->viW = VtsID__join2( thr->viW, so->viW );
6693 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006694
sewardj90eb22e2009-07-28 20:22:18 +00006695 /* See comment just above, re r10589. */
6696 //VtsID__rcdec(thr->viW);
6697 //thr->viW = VtsID__tick( thr->viW, thr );
6698 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00006699 }
6700
sewardjf4845dc2010-05-28 20:09:59 +00006701 if (thr->filter)
6702 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00006703 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00006704
sewardjf98e1c02008-10-25 16:22:41 +00006705 if (strong_recv)
6706 show_thread_state("s-recv", thr);
6707 else
6708 show_thread_state("w-recv", thr);
6709
6710 } else {
6711 tl_assert(so->viW == VtsID_INVALID);
6712 /* Deal with degenerate case: 'so' has no vts, so there has been
6713 no message posted to it. Just ignore this case. */
6714 show_thread_state("d-recv", thr);
6715 }
6716}
6717
6718Bool libhb_so_everSent ( SO* so )
6719{
6720 if (so->viR == VtsID_INVALID) {
6721 tl_assert(so->viW == VtsID_INVALID);
6722 return False;
6723 } else {
6724 tl_assert(so->viW != VtsID_INVALID);
6725 return True;
6726 }
6727}
6728
6729#define XXX1 0 // 0x67a106c
6730#define XXX2 0
6731
sewardj23f12002009-07-24 08:45:08 +00006732static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00006733 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6734 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6735 return False;
6736}
florian0c8a47c2013-10-01 20:10:21 +00006737static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
florian6bf37262012-10-21 03:23:36 +00006738{
sewardj23f12002009-07-24 08:45:08 +00006739 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00006740 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6741 show_thread_state("", thr);
6742 VG_(printf)("%s","\n");
6743}
6744
sewardj23f12002009-07-24 08:45:08 +00006745void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006746{
6747 SVal sv = SVal__mkC(thr->viW, thr->viW);
6748 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00006749 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6750 zsm_sset_range( a, szB, sv );
6751 Filter__clear_range( thr->filter, a, szB );
6752 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00006753}
6754
sewardjfd35d492011-03-17 19:39:55 +00006755void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006756{
sewardj23f12002009-07-24 08:45:08 +00006757 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00006758}
6759
philippef54cb662015-05-10 22:19:31 +00006760
6761/* Set the lines zix_start till zix_end to NOACCESS. */
6762static void zsm_secmap_line_range_noaccess (SecMap *sm,
6763 UInt zix_start, UInt zix_end)
6764{
6765 for (UInt lz = zix_start; lz <= zix_end; lz++) {
6766 LineZ* lineZ;
philippef54cb662015-05-10 22:19:31 +00006767 lineZ = &sm->linesZ[lz];
6768 if (lineZ->dict[0] != SVal_INVALID) {
6769 rcdec_LineZ(lineZ);
philippe71ed3c92015-05-17 19:32:42 +00006770 lineZ->dict[0] = SVal_NOACCESS;
6771 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
philippef54cb662015-05-10 22:19:31 +00006772 } else {
philippe71ed3c92015-05-17 19:32:42 +00006773 clear_LineF_of_Z(lineZ);
philippef54cb662015-05-10 22:19:31 +00006774 }
philippef54cb662015-05-10 22:19:31 +00006775 for (UInt i = 0; i < N_LINE_ARANGE/4; i++)
6776 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
6777 }
6778}
6779
6780/* Set the given range to SVal_NOACCESS in-place in the secmap.
6781 a must be cacheline aligned. len must be a multiple of a cacheline
6782 and must be < N_SECMAP_ARANGE. */
6783static void zsm_sset_range_noaccess_in_secmap(Addr a, SizeT len)
6784{
6785 tl_assert (is_valid_scache_tag (a));
6786 tl_assert (0 == (len & (N_LINE_ARANGE - 1)));
6787 tl_assert (len < N_SECMAP_ARANGE);
6788
6789 SecMap *sm1 = shmem__find_SecMap (a);
6790 SecMap *sm2 = shmem__find_SecMap (a + len - 1);
6791 UWord zix_start = shmem__get_SecMap_offset(a ) >> N_LINE_BITS;
6792 UWord zix_end = shmem__get_SecMap_offset(a + len - 1) >> N_LINE_BITS;
6793
6794 if (sm1) {
6795 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm1));
6796 zsm_secmap_line_range_noaccess (sm1, zix_start,
6797 sm1 == sm2 ? zix_end : N_SECMAP_ZLINES-1);
6798 }
6799 if (sm2 && sm1 != sm2) {
6800 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm2));
6801 zsm_secmap_line_range_noaccess (sm2, 0, zix_end);
6802 }
6803}
6804
6805/* Set the given address range to SVal_NOACCESS.
6806 The SecMaps fully set to SVal_NOACCESS will be pushed in SecMap_freelist. */
6807static void zsm_sset_range_noaccess (Addr addr, SizeT len)
6808{
6809 /*
6810 BPC = Before, Partial Cacheline, = addr
6811 (i.e. starting inside a cacheline/inside a SecMap)
6812 BFC = Before, Full Cacheline(s), but not full SecMap
6813 (i.e. starting inside a SecMap)
6814 FSM = Full SecMap(s)
6815 (i.e. starting a SecMap)
6816 AFC = After, Full Cacheline(s), but not full SecMap
6817 (i.e. first address after the full SecMap(s))
6818 APC = After, Partial Cacheline, i.e. first address after the
6819 full CacheLines).
6820 ARE = After Range End = addr+len = first address not part of the range.
6821
6822 If addr starts a Cacheline, then BPC == BFC.
6823 If addr starts a SecMap, then BPC == BFC == FSM.
6824 If addr+len starts a SecMap, then APC == ARE == AFC
6825 If addr+len starts a Cacheline, then APC == ARE
6826 */
6827 Addr ARE = addr + len;
6828 Addr BPC = addr;
6829 Addr BFC = ROUNDUP(BPC, N_LINE_ARANGE);
6830 Addr FSM = ROUNDUP(BPC, N_SECMAP_ARANGE);
6831 Addr AFC = ROUNDDN(ARE, N_SECMAP_ARANGE);
6832 Addr APC = ROUNDDN(ARE, N_LINE_ARANGE);
6833 SizeT Plen = len; // Plen will be split between the following:
6834 SizeT BPClen;
6835 SizeT BFClen;
6836 SizeT FSMlen;
6837 SizeT AFClen;
6838 SizeT APClen;
6839
6840 /* Consumes from Plen the nr of bytes between from and to.
6841 from and to must be aligned on a multiple of round.
6842 The length consumed will be a multiple of round, with
6843 a maximum of Plen. */
6844# define PlenCONSUME(from, to, round, consumed) \
6845 do { \
6846 if (from < to) { \
6847 if (to - from < Plen) \
6848 consumed = to - from; \
6849 else \
6850 consumed = ROUNDDN(Plen, round); \
6851 } else { \
6852 consumed = 0; \
6853 } \
6854 Plen -= consumed; } while (0)
6855
6856 PlenCONSUME(BPC, BFC, 1, BPClen);
6857 PlenCONSUME(BFC, FSM, N_LINE_ARANGE, BFClen);
6858 PlenCONSUME(FSM, AFC, N_SECMAP_ARANGE, FSMlen);
6859 PlenCONSUME(AFC, APC, N_LINE_ARANGE, AFClen);
6860 PlenCONSUME(APC, ARE, 1, APClen);
6861
6862 if (0)
florian5e5cb002015-08-03 21:21:42 +00006863 VG_(printf) ("addr %p[%lu] ARE %p"
6864 " BPC %p[%lu] BFC %p[%lu] FSM %p[%lu]"
6865 " AFC %p[%lu] APC %p[%lu]\n",
philippef54cb662015-05-10 22:19:31 +00006866 (void*)addr, len, (void*)ARE,
6867 (void*)BPC, BPClen, (void*)BFC, BFClen, (void*)FSM, FSMlen,
6868 (void*)AFC, AFClen, (void*)APC, APClen);
6869
6870 tl_assert (Plen == 0);
6871
6872 /* Set to NOACCESS pieces before and after not covered by entire SecMaps. */
6873
6874 /* First we set the partial cachelines. This is done through the cache. */
6875 if (BPClen > 0)
6876 zsm_sset_range_SMALL (BPC, BPClen, SVal_NOACCESS);
6877 if (APClen > 0)
6878 zsm_sset_range_SMALL (APC, APClen, SVal_NOACCESS);
6879
6880 /* After this, we will not use the cache anymore. We will directly work
6881 in-place on the z shadow memory in SecMap(s).
6882 So, we invalidate the cachelines for the whole range we are setting
6883 to NOACCESS below. */
6884 shmem__invalidate_scache_range (BFC, APC - BFC);
6885
6886 if (BFClen > 0)
6887 zsm_sset_range_noaccess_in_secmap (BFC, BFClen);
6888 if (AFClen > 0)
6889 zsm_sset_range_noaccess_in_secmap (AFC, AFClen);
6890
6891 if (FSMlen > 0) {
6892 /* Set to NOACCESS all the SecMaps, pushing the SecMaps to the
6893 free list. */
6894 Addr sm_start = FSM;
6895 while (sm_start < AFC) {
6896 SecMap *sm = shmem__find_SecMap (sm_start);
6897 if (sm) {
6898 Addr gaKey;
6899 SecMap *fm_sm;
6900
6901 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
6902 for (UInt lz = 0; lz < N_SECMAP_ZLINES; lz++) {
philippe71ed3c92015-05-17 19:32:42 +00006903 LineZ *lineZ = &sm->linesZ[lz];
6904 if (LIKELY(lineZ->dict[0] != SVal_INVALID))
6905 rcdec_LineZ(lineZ);
6906 else
6907 clear_LineF_of_Z(lineZ);
philippef54cb662015-05-10 22:19:31 +00006908 }
6909 if (!VG_(delFromFM)(map_shmem, &gaKey, (UWord*)&fm_sm, sm_start))
6910 tl_assert (0);
6911 stats__secmaps_in_map_shmem--;
6912 tl_assert (gaKey == sm_start);
6913 tl_assert (sm == fm_sm);
6914 stats__secmaps_ssetGCed++;
6915 push_SecMap_on_freelist (sm);
6916 }
6917 sm_start += N_SECMAP_ARANGE;
6918 }
6919 tl_assert (sm_start == AFC);
6920
6921 /* The above loop might have kept copies of freed SecMap in the smCache.
6922 => clear them. */
6923 if (address_in_range(smCache[0].gaKey, FSM, FSMlen)) {
6924 smCache[0].gaKey = 1;
6925 smCache[0].sm = NULL;
6926 }
6927 if (address_in_range(smCache[1].gaKey, FSM, FSMlen)) {
6928 smCache[1].gaKey = 1;
6929 smCache[1].sm = NULL;
6930 }
6931 if (address_in_range(smCache[2].gaKey, FSM, FSMlen)) {
6932 smCache[2].gaKey = 1;
6933 smCache[2].sm = NULL;
6934 }
6935 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(SMCacheEnt));
6936 }
6937}
6938
sewardjfd35d492011-03-17 19:39:55 +00006939void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6940{
6941 /* This really does put the requested range in NoAccess. It's
6942 expensive though. */
6943 SVal sv = SVal_NOACCESS;
6944 tl_assert(is_sane_SVal_C(sv));
philippef54cb662015-05-10 22:19:31 +00006945 if (LIKELY(szB < 2 * N_LINE_ARANGE))
6946 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
6947 else
6948 zsm_sset_range_noaccess (a, szB);
sewardjfd35d492011-03-17 19:39:55 +00006949 Filter__clear_range( thr->filter, a, szB );
6950}
6951
philippef54cb662015-05-10 22:19:31 +00006952/* Works byte at a time. Can be optimised if needed. */
6953UWord libhb_srange_get_abits (Addr a, UChar *abits, SizeT len)
6954{
6955 UWord anr = 0; // nr of bytes addressable.
6956
6957 /* Get the accessibility of each byte. Pay attention to not
6958 create SecMap or LineZ when checking if a byte is addressable.
6959
6960 Note: this is used for client request. Performance deemed not critical.
6961 So for simplicity, we work byte per byte.
6962 Performance could be improved by working with full cachelines
6963 or with full SecMap, when reaching a cacheline or secmap boundary. */
6964 for (SizeT i = 0; i < len; i++) {
6965 SVal sv = SVal_INVALID;
6966 Addr b = a + i;
6967 Addr tag = b & ~(N_LINE_ARANGE - 1);
6968 UWord wix = (b >> N_LINE_BITS) & (N_WAY_NENT - 1);
6969 UWord cloff = get_cacheline_offset(b);
6970
6971 /* Note: we do not use get_cacheline(b) to avoid creating cachelines
6972 and/or SecMap for non addressable bytes. */
6973 if (tag == cache_shmem.tags0[wix]) {
6974 CacheLine copy = cache_shmem.lyns0[wix];
6975 /* We work on a copy of the cacheline, as we do not want to
6976 record the client request as a real read.
6977 The below is somewhat similar to zsm_sapply08__msmcread but
6978 avoids side effects on the cache. */
6979 UWord toff = get_tree_offset(b); /* == 0 .. 7 */
6980 UWord tno = get_treeno(b);
6981 UShort descr = copy.descrs[tno];
6982 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
6983 SVal* tree = &copy.svals[tno << 3];
6984 copy.descrs[tno] = pulldown_to_8(tree, toff, descr);
6985 }
6986 sv = copy.svals[cloff];
6987 } else {
6988 /* Byte not found in the cacheline. Search for a SecMap. */
6989 SecMap *sm = shmem__find_SecMap(b);
6990 LineZ *lineZ;
6991 if (sm == NULL)
6992 sv = SVal_NOACCESS;
6993 else {
6994 UWord zix = shmem__get_SecMap_offset(b) >> N_LINE_BITS;
6995 lineZ = &sm->linesZ[zix];
6996 if (lineZ->dict[0] == SVal_INVALID) {
philippe71ed3c92015-05-17 19:32:42 +00006997 LineF *lineF = SVal2Ptr(lineZ->dict[1]);
6998 sv = lineF->w64s[cloff];
philippef54cb662015-05-10 22:19:31 +00006999 } else {
7000 UWord ix = read_twobit_array( lineZ->ix2s, cloff );
7001 sv = lineZ->dict[ix];
7002 }
7003 }
7004 }
7005
7006 tl_assert (sv != SVal_INVALID);
7007 if (sv == SVal_NOACCESS) {
7008 if (abits)
7009 abits[i] = 0x00;
7010 } else {
7011 if (abits)
7012 abits[i] = 0xff;
7013 anr++;
7014 }
7015 }
7016
7017 return anr;
7018}
7019
7020
sewardj406bac82010-03-03 23:03:40 +00007021void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
7022{
7023 SVal sv = SVal_NOACCESS;
7024 tl_assert(is_sane_SVal_C(sv));
7025 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
philippef54cb662015-05-10 22:19:31 +00007026 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7027 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7028 else
7029 zsm_sset_range_noaccess (a, szB);
sewardj406bac82010-03-03 23:03:40 +00007030 Filter__clear_range( thr->filter, a, szB );
7031 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
7032}
7033
sewardj0b20a152011-03-10 21:34:21 +00007034Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
sewardjf98e1c02008-10-25 16:22:41 +00007035 tl_assert(thr);
sewardj60626642011-03-10 15:14:37 +00007036 return thr->hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00007037}
7038
sewardj0b20a152011-03-10 21:34:21 +00007039void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
sewardjf98e1c02008-10-25 16:22:41 +00007040 tl_assert(thr);
sewardj0b20a152011-03-10 21:34:21 +00007041 thr->hgthread = hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00007042}
7043
sewardj23f12002009-07-24 08:45:08 +00007044void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00007045{
sewardj23f12002009-07-24 08:45:08 +00007046 zsm_scopy_range(src, dst, len);
7047 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00007048}
7049
7050void libhb_maybe_GC ( void )
7051{
philippecabdbb52015-04-20 21:33:16 +00007052 /* GC the unreferenced (zero rc) RCECs when
philippee0829e02015-04-21 20:55:40 +00007053 (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
7054 with mostly NULL ptr)
7055 and (2) approaching the max nr of RCEC (as we have in any case
7056 at least that amount of RCEC in the pool allocator)
7057 Note: the margin allows to avoid a small but constant increase
7058 of the max nr of RCEC due to the fact that libhb_maybe_GC is
7059 not called when the current nr of RCEC exactly reaches the max.
7060 and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
7061 Avoid growing too much the nr of RCEC keeps the memory use low,
7062 and avoids to have too many elements in the (fixed) contextTab hashtable.
7063 */
philippecabdbb52015-04-20 21:33:16 +00007064 if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
philippee0829e02015-04-21 20:55:40 +00007065 && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
philippef54cb662015-05-10 22:19:31 +00007066 && (stats__ctxt_tab_curr * 3)/4 > RCEC_referenced))
philippecabdbb52015-04-20 21:33:16 +00007067 do_RCEC_GC();
philippe158404e2015-04-10 19:34:14 +00007068
philippef54cb662015-05-10 22:19:31 +00007069 /* If there are still no entries available (all the table entries are full),
florianad4e9792015-07-05 21:53:33 +00007070 and we hit the threshold point, then do a GC */
philippef54cb662015-05-10 22:19:31 +00007071 Bool vts_tab_GC = vts_tab_freelist == VtsID_INVALID
7072 && VG_(sizeXA)( vts_tab ) >= vts_next_GC_at;
7073 if (UNLIKELY (vts_tab_GC))
7074 vts_tab__do_GC( False/*don't show stats*/ );
7075
7076 /* scan GC the SecMaps when
7077 (1) no SecMap in the freelist
7078 and (2) the current nr of live secmaps exceeds the threshold. */
7079 if (UNLIKELY(SecMap_freelist == NULL
7080 && stats__secmaps_in_map_shmem >= next_SecMap_GC_at)) {
7081 // If we did a vts tab GC, then no need to flush the cache again.
7082 if (!vts_tab_GC)
7083 zsm_flush_cache();
7084 shmem__SecMap_do_GC(True);
7085 }
philippecabdbb52015-04-20 21:33:16 +00007086
7087 /* Check the reference counts (expensive) */
7088 if (CHECK_CEM)
7089 event_map__check_reference_counts();
sewardjf98e1c02008-10-25 16:22:41 +00007090}
7091
7092
7093/////////////////////////////////////////////////////////////////
7094/////////////////////////////////////////////////////////////////
7095// //
7096// SECTION END main library //
7097// //
7098/////////////////////////////////////////////////////////////////
7099/////////////////////////////////////////////////////////////////
7100
7101/*--------------------------------------------------------------------*/
7102/*--- end libhb_main.c ---*/
7103/*--------------------------------------------------------------------*/