blob: 23acf595d38ea977c544e244ded33a3efabad8b8 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
sewardj0f157dd2013-10-18 14:27:36 +000012 Copyright (C) 2008-2013 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000034#include "pub_tool_poolalloc.h"
sewardjf98e1c02008-10-25 16:22:41 +000035#include "pub_tool_libcassert.h"
36#include "pub_tool_libcbase.h"
37#include "pub_tool_libcprint.h"
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000040#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000041#include "pub_tool_xarray.h"
42#include "pub_tool_oset.h"
43#include "pub_tool_threadstate.h"
44#include "pub_tool_aspacemgr.h"
45#include "pub_tool_execontext.h"
46#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000047#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000048#include "hg_basics.h"
49#include "hg_wordset.h"
50#include "hg_lock_n_thread.h"
51#include "hg_errors.h"
52
53#include "libhb.h"
54
55
sewardj8f5374e2008-12-07 11:40:17 +000056/////////////////////////////////////////////////////////////////
57/////////////////////////////////////////////////////////////////
58// //
59// Debugging #defines //
60// //
61/////////////////////////////////////////////////////////////////
62/////////////////////////////////////////////////////////////////
63
64/* Check the sanity of shadow values in the core memory state
65 machine. Change #if 0 to #if 1 to enable this. */
66#if 0
67# define CHECK_MSM 1
68#else
69# define CHECK_MSM 0
70#endif
71
72
73/* Check sanity (reference counts, etc) in the conflicting access
74 machinery. Change #if 0 to #if 1 to enable this. */
75#if 0
76# define CHECK_CEM 1
77#else
78# define CHECK_CEM 0
79#endif
80
81
82/* Check sanity in the compressed shadow memory machinery,
83 particularly in its caching innards. Unfortunately there's no
84 almost-zero-cost way to make them selectable at run time. Hence
85 set the #if 0 to #if 1 and rebuild if you want them. */
86#if 0
87# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
88# define inline __attribute__((noinline))
89 /* probably want to ditch -fomit-frame-pointer too */
90#else
91# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
92#endif
93
94
95/////////////////////////////////////////////////////////////////
96/////////////////////////////////////////////////////////////////
97// //
sewardjffce8152011-06-24 10:09:41 +000098// data decls: VtsID //
99// //
100/////////////////////////////////////////////////////////////////
101/////////////////////////////////////////////////////////////////
102
103/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
104 bits, since they have to be packed into the lowest 30 bits of an
105 SVal. */
106typedef UInt VtsID;
107#define VtsID_INVALID 0xFFFFFFFF
108
109
110
111/////////////////////////////////////////////////////////////////
112/////////////////////////////////////////////////////////////////
113// //
114// data decls: SVal //
115// //
116/////////////////////////////////////////////////////////////////
117/////////////////////////////////////////////////////////////////
118
119typedef ULong SVal;
120
121/* This value has special significance to the implementation, and callers
122 may not store it in the shadow memory. */
123#define SVal_INVALID (3ULL << 62)
124
125/* This is the default value for shadow memory. Initially the shadow
126 memory contains no accessible areas and so all reads produce this
127 value. TODO: make this caller-defineable. */
128#define SVal_NOACCESS (2ULL << 62)
129
130
131
132/////////////////////////////////////////////////////////////////
133/////////////////////////////////////////////////////////////////
134// //
135// data decls: ScalarTS //
136// //
137/////////////////////////////////////////////////////////////////
138/////////////////////////////////////////////////////////////////
139
140/* Scalar Timestamp. We have to store a lot of these, so there is
141 some effort to make them as small as possible. Logically they are
142 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
143 We pack it into 64 bits by representing the Thr* using a ThrID, a
144 small integer (18 bits), and a 46 bit integer for the timestamp
145 number. The 46/18 split is arbitary, but has the effect that
146 Helgrind can only handle programs that create 2^18 or fewer threads
147 over their entire lifetime, and have no more than 2^46 timestamp
148 ticks (synchronisation operations on the same thread).
149
150 This doesn't seem like much of a limitation. 2^46 ticks is
151 7.06e+13, and if each tick (optimistically) takes the machine 1000
152 cycles to process, then the minimum time to process that many ticks
153 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
154 but VTS ticks, which isn't realistic.
155
156 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
157 32 since a ThrID is a UInt. 29 comes from the fact that
158 'Thr_n_RCEC', which records information about old accesses, packs
159 not only a ThrID but also 2+1 other bits (access size and
160 writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
161
162 NB2: thrid values are issued upwards from 1024, and values less
163 than that aren't valid. This isn't per se necessary (any order
164 will do, so long as they are unique), but it does help ensure they
165 are less likely to get confused with the various other kinds of
166 small-integer thread ids drifting around (eg, TId). See also NB5.
167
168 NB3: this probably also relies on the fact that Thr's are never
169 deallocated -- they exist forever. Hence the 1-1 mapping from
170 Thr's to thrid values (set up in Thr__new) persists forever.
171
172 NB4: temp_max_sized_VTS is allocated at startup and never freed.
173 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
174 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
175 making the memory use for this go sky-high. With
176 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
177 like an OK tradeoff. If more than 256k threads need to be
178 supported, we could change SCALARTS_N_THRBITS to 20, which would
179 facilitate supporting 1 million threads at the cost of 8MB storage
180 for temp_max_sized_VTS.
181
182 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
183 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
184 must never be a valid ThrID. Given NB2 that's OK.
185*/
186#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
187
188#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
189typedef
190 struct {
191 ThrID thrid : SCALARTS_N_THRBITS;
192 ULong tym : SCALARTS_N_TYMBITS;
193 }
194 ScalarTS;
195
196#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
197
198
199
200/////////////////////////////////////////////////////////////////
201/////////////////////////////////////////////////////////////////
202// //
203// data decls: Filter //
204// //
205/////////////////////////////////////////////////////////////////
206/////////////////////////////////////////////////////////////////
207
208// baseline: 5, 9
209#define FI_LINE_SZB_LOG2 5
210#define FI_NUM_LINES_LOG2 10
211
212#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
213#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
214
215#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
216#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
217
218#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
219 & (Addr)(FI_NUM_LINES-1) )
220
221
222/* In the lines, each 8 bytes are treated individually, and are mapped
223 to a UShort. Regardless of endianness of the underlying machine,
224 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
225 the highest address.
226
227 Of each bit pair, the higher numbered bit is set if a R has been
228 seen, so the actual layout is:
229
230 15 14 ... 01 00
231
232 R W for addr+7 ... R W for addr+0
233
234 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
235*/
236
237/* tags are separated from lines. tags are Addrs and are
238 the base address of the line. */
239typedef
240 struct {
241 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
242 }
243 FiLine;
244
245typedef
246 struct {
247 Addr tags[FI_NUM_LINES];
248 FiLine lines[FI_NUM_LINES];
249 }
250 Filter;
251
252
253
254/////////////////////////////////////////////////////////////////
255/////////////////////////////////////////////////////////////////
256// //
257// data decls: Thr, ULong_n_EC //
258// //
259/////////////////////////////////////////////////////////////////
260/////////////////////////////////////////////////////////////////
261
262// Records stacks for H1 history mechanism (DRD-style)
263typedef
264 struct { ULong ull; ExeContext* ec; }
265 ULong_n_EC;
266
267
268/* How many of the above records to collect for each thread? Older
269 ones are dumped when we run out of space. 62.5k requires 1MB per
270 thread, since each ULong_n_EC record is 16 bytes long. When more
271 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
272 deleted to make space. Hence in the worst case we will be able to
273 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
274 Kw transitions (segments in this thread). For the current setting
275 that gives a guaranteed stack for at least the last 31.25k
276 segments. */
277#define N_KWs_N_STACKs_PER_THREAD 62500
278
279
280struct _Thr {
281 /* Current VTSs for this thread. They change as we go along. viR
282 is the VTS to be used for reads, viW for writes. Usually they
283 are the same, but can differ when we deal with reader-writer
284 locks. It is always the case that
285 VtsID__cmpLEQ(viW,viR) == True
286 that is, viW must be the same, or lagging behind, viR. */
287 VtsID viR;
288 VtsID viW;
289
290 /* Is initially False, and is set to True after the thread really
291 has done a low-level exit. When True, we expect to never see
292 any more memory references done by this thread. */
293 Bool llexit_done;
294
295 /* Is initially False, and is set to True after the thread has been
296 joined with (reaped by some other thread). After this point, we
297 do not expect to see any uses of .viR or .viW, so it is safe to
298 set them to VtsID_INVALID. */
299 Bool joinedwith_done;
300
301 /* A small integer giving a unique identity to this Thr. See
302 comments on the definition of ScalarTS for details. */
303 ThrID thrid : SCALARTS_N_THRBITS;
304
305 /* A filter that removes references for which we believe that
306 msmcread/msmcwrite will not change the state, nor report a
307 race. */
308 Filter* filter;
309
310 /* A pointer back to the top level Thread structure. There is a
311 1-1 mapping between Thread and Thr structures -- each Thr points
312 at its corresponding Thread, and vice versa. Really, Thr and
313 Thread should be merged into a single structure. */
314 Thread* hgthread;
315
316 /* The ULongs (scalar Kws) in this accumulate in strictly
317 increasing order, without duplicates. This is important because
318 we need to be able to find a given scalar Kw in this array
319 later, by binary search. */
320 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
321};
322
323
324
325/////////////////////////////////////////////////////////////////
326/////////////////////////////////////////////////////////////////
327// //
328// data decls: SO //
329// //
330/////////////////////////////////////////////////////////////////
331/////////////////////////////////////////////////////////////////
332
333// (UInt) `echo "Synchronisation object" | md5sum`
334#define SO_MAGIC 0x56b3c5b0U
335
336struct _SO {
337 struct _SO* admin_prev;
338 struct _SO* admin_next;
339 VtsID viR; /* r-clock of sender */
340 VtsID viW; /* w-clock of sender */
341 UInt magic;
342};
343
344
345
346/////////////////////////////////////////////////////////////////
347/////////////////////////////////////////////////////////////////
348// //
sewardj8f5374e2008-12-07 11:40:17 +0000349// Forward declarations //
350// //
351/////////////////////////////////////////////////////////////////
352/////////////////////////////////////////////////////////////////
353
sewardjf98e1c02008-10-25 16:22:41 +0000354/* fwds for
355 Globals needed by other parts of the library. These are set
356 once at startup and then never changed. */
357static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000358static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000359
sewardjffce8152011-06-24 10:09:41 +0000360/* misc fn and data fwdses */
361static void VtsID__rcinc ( VtsID ii );
362static void VtsID__rcdec ( VtsID ii );
363
364static inline Bool SVal__isC ( SVal s );
365static inline VtsID SVal__unC_Rmin ( SVal s );
366static inline VtsID SVal__unC_Wmin ( SVal s );
367static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
philippe1475a7f2015-05-11 19:45:08 +0000368static inline void SVal__rcinc ( SVal s );
369static inline void SVal__rcdec ( SVal s );
philippe71ed3c92015-05-17 19:32:42 +0000370/* SVal in LineZ are used to store various pointers. */
371static inline void *SVal2Ptr (SVal s);
372static inline SVal Ptr2SVal (void* ptr);
sewardjffce8152011-06-24 10:09:41 +0000373
374/* A double linked list of all the SO's. */
375SO* admin_SO;
376
sewardjf98e1c02008-10-25 16:22:41 +0000377
378
379/////////////////////////////////////////////////////////////////
380/////////////////////////////////////////////////////////////////
381// //
382// SECTION BEGIN compressed shadow memory //
383// //
384/////////////////////////////////////////////////////////////////
385/////////////////////////////////////////////////////////////////
386
387#ifndef __HB_ZSM_H
388#define __HB_ZSM_H
389
sewardjf98e1c02008-10-25 16:22:41 +0000390/* Initialise the library. Once initialised, it will (or may) call
philippe1475a7f2015-05-11 19:45:08 +0000391 SVal__rcinc and SVal__rcdec in response to all the calls below, in order to
sewardjf98e1c02008-10-25 16:22:41 +0000392 allow the user to do reference counting on the SVals stored herein.
393 It is important to understand, however, that due to internal
394 caching, the reference counts are in general inaccurate, and can be
395 both above or below the true reference count for an item. In
396 particular, the library may indicate that the reference count for
397 an item is zero, when in fact it is not.
398
399 To make the reference counting exact and therefore non-pointless,
400 call zsm_flush_cache. Immediately after it returns, the reference
401 counts for all items, as deduced by the caller by observing calls
philippe1475a7f2015-05-11 19:45:08 +0000402 to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a
403 zero reference count may be freed (or at least considered to be
sewardjf98e1c02008-10-25 16:22:41 +0000404 unreferenced by this library).
405*/
philippe1475a7f2015-05-11 19:45:08 +0000406static void zsm_init ( void );
sewardjf98e1c02008-10-25 16:22:41 +0000407
sewardj23f12002009-07-24 08:45:08 +0000408static void zsm_sset_range ( Addr, SizeT, SVal );
philippef54cb662015-05-10 22:19:31 +0000409static void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew );
sewardj23f12002009-07-24 08:45:08 +0000410static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000411static void zsm_flush_cache ( void );
412
413#endif /* ! __HB_ZSM_H */
414
415
sewardjf98e1c02008-10-25 16:22:41 +0000416/* Round a up to the next multiple of N. N must be a power of 2 */
417#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
418/* Round a down to the next multiple of N. N must be a power of 2 */
419#define ROUNDDN(a, N) ((a) & ~(N-1))
420
philippef54cb662015-05-10 22:19:31 +0000421/* True if a belongs in range [start, start + szB[
422 (i.e. start + szB is excluded). */
423static inline Bool address_in_range (Addr a, Addr start, SizeT szB)
424{
425 /* Checking start <= a && a < start + szB.
426 As start and a are unsigned addresses, the condition can
427 be simplified. */
428 if (CHECK_ZSM)
429 tl_assert ((a - start < szB)
430 == (start <= a
431 && a < start + szB));
432 return a - start < szB;
433}
sewardjf98e1c02008-10-25 16:22:41 +0000434
sewardjf98e1c02008-10-25 16:22:41 +0000435/* ------ CacheLine ------ */
436
437#define N_LINE_BITS 6 /* must be >= 3 */
438#define N_LINE_ARANGE (1 << N_LINE_BITS)
439#define N_LINE_TREES (N_LINE_ARANGE >> 3)
440
441typedef
442 struct {
443 UShort descrs[N_LINE_TREES];
444 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
445 }
446 CacheLine;
447
448#define TREE_DESCR_16_0 (1<<0)
449#define TREE_DESCR_32_0 (1<<1)
450#define TREE_DESCR_16_1 (1<<2)
451#define TREE_DESCR_64 (1<<3)
452#define TREE_DESCR_16_2 (1<<4)
453#define TREE_DESCR_32_1 (1<<5)
454#define TREE_DESCR_16_3 (1<<6)
455#define TREE_DESCR_8_0 (1<<7)
456#define TREE_DESCR_8_1 (1<<8)
457#define TREE_DESCR_8_2 (1<<9)
458#define TREE_DESCR_8_3 (1<<10)
459#define TREE_DESCR_8_4 (1<<11)
460#define TREE_DESCR_8_5 (1<<12)
461#define TREE_DESCR_8_6 (1<<13)
462#define TREE_DESCR_8_7 (1<<14)
463#define TREE_DESCR_DTY (1<<15)
464
465typedef
466 struct {
467 SVal dict[4]; /* can represent up to 4 diff values in the line */
468 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
469 dict indexes */
philippe71ed3c92015-05-17 19:32:42 +0000470 /* if dict[0] == SVal_INVALID then dict[1] is a pointer to the
sewardjf98e1c02008-10-25 16:22:41 +0000471 LineF to use, and dict[2..] are also SVal_INVALID. */
472 }
473 LineZ; /* compressed rep for a cache line */
474
philippe71ed3c92015-05-17 19:32:42 +0000475/* LineZ.dict[1] is used to store various pointers:
476 * In the first lineZ of a free SecMap, it points to the next free SecMap.
477 * In a lineZ for which we need to use a lineF, it points to the lineF. */
478
479
sewardjf98e1c02008-10-25 16:22:41 +0000480typedef
481 struct {
sewardjf98e1c02008-10-25 16:22:41 +0000482 SVal w64s[N_LINE_ARANGE];
483 }
484 LineF; /* full rep for a cache line */
485
philippe71ed3c92015-05-17 19:32:42 +0000486/* We use a pool allocator for LineF, as LineF is relatively small,
487 and we will often alloc/release such lines. */
488static PoolAlloc* LineF_pool_allocator;
489
490/* SVal in a lineZ are used to store various pointers.
491 Below are conversion functions to support that. */
492static inline LineF *LineF_Ptr (LineZ *lineZ)
493{
494 tl_assert(lineZ->dict[0] == SVal_INVALID);
495 return SVal2Ptr (lineZ->dict[1]);
496}
497
sewardjf98e1c02008-10-25 16:22:41 +0000498/* Shadow memory.
499 Primary map is a WordFM Addr SecMap*.
500 SecMaps cover some page-size-ish section of address space and hold
501 a compressed representation.
502 CacheLine-sized chunks of SecMaps are copied into a Cache, being
503 decompressed when moved into the cache and recompressed on the
504 way out. Because of this, the cache must operate as a writeback
505 cache, not a writethrough one.
506
507 Each SecMap must hold a power-of-2 number of CacheLines. Hence
508 N_SECMAP_BITS must >= N_LINE_BITS.
509*/
510#define N_SECMAP_BITS 13
511#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
512
513// # CacheLines held by a SecMap
514#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
515
516/* The data in the SecMap is held in the array of LineZs. Each LineZ
517 either carries the required data directly, in a compressed
philippe71ed3c92015-05-17 19:32:42 +0000518 representation, or it holds (in .dict[1]) a pointer to a LineF
519 that holds the full representation.
sewardjf98e1c02008-10-25 16:22:41 +0000520
philippe71ed3c92015-05-17 19:32:42 +0000521 As each in-use LineF is referred to by exactly one LineZ,
522 the number of .linesZ[] that refer to a lineF should equal
523 the number of used lineF.
sewardjf98e1c02008-10-25 16:22:41 +0000524
525 RC obligations: the RCs presented to the user include exactly
526 the values in:
527 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
philippe71ed3c92015-05-17 19:32:42 +0000528 * F reps that are in use
sewardjf98e1c02008-10-25 16:22:41 +0000529
530 Hence the following actions at the following transitions are required:
531
philippe71ed3c92015-05-17 19:32:42 +0000532 F rep: alloc'd -> freed -- rcdec_LineF
533 F rep: -> alloc'd -- rcinc_LineF
sewardjf98e1c02008-10-25 16:22:41 +0000534 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
535 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
536*/
philippe71ed3c92015-05-17 19:32:42 +0000537
sewardjf98e1c02008-10-25 16:22:41 +0000538typedef
539 struct {
540 UInt magic;
541 LineZ linesZ[N_SECMAP_ZLINES];
sewardjf98e1c02008-10-25 16:22:41 +0000542 }
543 SecMap;
544
545#define SecMap_MAGIC 0x571e58cbU
546
philippef54cb662015-05-10 22:19:31 +0000547// (UInt) `echo "Free SecMap" | md5sum`
548#define SecMap_free_MAGIC 0x5a977f30U
549
sewardj5aa09bf2014-06-20 14:25:53 +0000550__attribute__((unused))
sewardjf98e1c02008-10-25 16:22:41 +0000551static inline Bool is_sane_SecMap ( SecMap* sm ) {
552 return sm != NULL && sm->magic == SecMap_MAGIC;
553}
554
555/* ------ Cache ------ */
556
557#define N_WAY_BITS 16
558#define N_WAY_NENT (1 << N_WAY_BITS)
559
560/* Each tag is the address of the associated CacheLine, rounded down
561 to a CacheLine address boundary. A CacheLine size must be a power
562 of 2 and must be 8 or more. Hence an easy way to initialise the
563 cache so it is empty is to set all the tag values to any value % 8
564 != 0, eg 1. This means all queries in the cache initially miss.
565 It does however require us to detect and not writeback, any line
566 with a bogus tag. */
567typedef
568 struct {
569 CacheLine lyns0[N_WAY_NENT];
570 Addr tags0[N_WAY_NENT];
571 }
572 Cache;
573
574static inline Bool is_valid_scache_tag ( Addr tag ) {
575 /* a valid tag should be naturally aligned to the start of
576 a CacheLine. */
577 return 0 == (tag & (N_LINE_ARANGE - 1));
578}
579
580
581/* --------- Primary data structures --------- */
582
583/* Shadow memory primary map */
584static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
585static Cache cache_shmem;
586
587
588static UWord stats__secmaps_search = 0; // # SM finds
589static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
590static UWord stats__secmaps_allocd = 0; // # SecMaps issued
philippef54cb662015-05-10 22:19:31 +0000591static UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live'
592static UWord stats__secmaps_scanGC = 0; // # nr of scan GC done.
593static UWord stats__secmaps_scanGCed = 0; // # SecMaps GC-ed via scan
594static UWord stats__secmaps_ssetGCed = 0; // # SecMaps GC-ed via setnoaccess
sewardjf98e1c02008-10-25 16:22:41 +0000595static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
596static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
597static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
sewardjf98e1c02008-10-25 16:22:41 +0000598static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
599static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
600static UWord stats__cache_F_fetches = 0; // # F lines fetched
601static UWord stats__cache_F_wbacks = 0; // # F lines written back
philippef54cb662015-05-10 22:19:31 +0000602static UWord stats__cache_flushes_invals = 0; // # cache flushes and invals
sewardjf98e1c02008-10-25 16:22:41 +0000603static UWord stats__cache_totrefs = 0; // # total accesses
604static UWord stats__cache_totmisses = 0; // # misses
605static ULong stats__cache_make_New_arange = 0; // total arange made New
606static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
607static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000608static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
609static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
610static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
611static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
612static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
613static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
614static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
615static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
616static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
617static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
618static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
619static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
620static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
621static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000622static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
623static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
624static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
625static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
626static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
627static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000628static UWord stats__vts__tick = 0; // # calls to VTS__tick
629static UWord stats__vts__join = 0; // # calls to VTS__join
630static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
631static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
philippef54cb662015-05-10 22:19:31 +0000632static UWord stats__vts_tab_GC = 0; // # nr of vts_tab GC
philippe2bd23262015-05-11 20:56:49 +0000633static UWord stats__vts_pruning = 0; // # nr of vts pruning
sewardj7aa38a92011-02-27 23:04:12 +0000634
635// # calls to VTS__cmp_structural w/ slow case
636static UWord stats__vts__cmp_structural_slow = 0;
637
638// # calls to VTS__indexAt_SLOW
639static UWord stats__vts__indexat_slow = 0;
640
641// # calls to vts_set__find__or__clone_and_add
642static UWord stats__vts_set__focaa = 0;
643
644// # calls to vts_set__find__or__clone_and_add that lead to an
645// allocation
646static UWord stats__vts_set__focaa_a = 0;
sewardjc8028ad2010-05-05 09:34:42 +0000647
sewardjf98e1c02008-10-25 16:22:41 +0000648
649static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
650 return a & ~(N_SECMAP_ARANGE - 1);
651}
652static inline UWord shmem__get_SecMap_offset ( Addr a ) {
653 return a & (N_SECMAP_ARANGE - 1);
654}
655
656
657/*----------------------------------------------------------------*/
658/*--- map_shmem :: WordFM Addr SecMap ---*/
659/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
660/*----------------------------------------------------------------*/
661
662/*--------------- SecMap allocation --------------- */
663
664static HChar* shmem__bigchunk_next = NULL;
665static HChar* shmem__bigchunk_end1 = NULL;
666
667static void* shmem__bigchunk_alloc ( SizeT n )
668{
669 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
670 tl_assert(n > 0);
671 n = VG_ROUNDUP(n, 16);
672 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
673 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
674 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
675 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
676 if (0)
677 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
678 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
679 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
680 if (shmem__bigchunk_next == NULL)
681 VG_(out_of_memory_NORETURN)(
682 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
683 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
684 }
685 tl_assert(shmem__bigchunk_next);
686 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
687 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
688 shmem__bigchunk_next += n;
689 return shmem__bigchunk_next - n;
690}
691
philippef54cb662015-05-10 22:19:31 +0000692/* SecMap changed to be fully SVal_NOACCESS are inserted in a list of
693 recycled SecMap. When a new SecMap is needed, a recycled SecMap
694 will be used in preference to allocating a new SecMap. */
philippe71ed3c92015-05-17 19:32:42 +0000695/* We make a linked list of SecMap. The first LineZ is re-used to
696 implement the linked list. */
697/* Returns the SecMap following sm in the free list.
698 NULL if sm is the last SecMap. sm must be on the free list. */
699static inline SecMap *SecMap_freelist_next ( SecMap* sm )
700{
701 tl_assert (sm);
702 tl_assert (sm->magic == SecMap_free_MAGIC);
703 return SVal2Ptr (sm->linesZ[0].dict[1]);
704}
705static inline void set_SecMap_freelist_next ( SecMap* sm, SecMap* next )
706{
707 tl_assert (sm);
708 tl_assert (sm->magic == SecMap_free_MAGIC);
709 tl_assert (next == NULL || next->magic == SecMap_free_MAGIC);
710 sm->linesZ[0].dict[1] = Ptr2SVal (next);
711}
712
philippef54cb662015-05-10 22:19:31 +0000713static SecMap *SecMap_freelist = NULL;
714static UWord SecMap_freelist_length(void)
715{
716 SecMap *sm;
717 UWord n = 0;
718
719 sm = SecMap_freelist;
720 while (sm) {
721 n++;
philippe71ed3c92015-05-17 19:32:42 +0000722 sm = SecMap_freelist_next (sm);
philippef54cb662015-05-10 22:19:31 +0000723 }
724 return n;
725}
726
727static void push_SecMap_on_freelist(SecMap* sm)
728{
729 if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm);
730 sm->magic = SecMap_free_MAGIC;
philippe71ed3c92015-05-17 19:32:42 +0000731 set_SecMap_freelist_next(sm, SecMap_freelist);
philippef54cb662015-05-10 22:19:31 +0000732 SecMap_freelist = sm;
733}
734/* Returns a free SecMap if there is one.
735 Otherwise, returns NULL. */
736static SecMap *pop_SecMap_from_freelist(void)
737{
738 SecMap *sm;
739
740 sm = SecMap_freelist;
741 if (sm) {
742 tl_assert (sm->magic == SecMap_free_MAGIC);
philippe71ed3c92015-05-17 19:32:42 +0000743 SecMap_freelist = SecMap_freelist_next (sm);
philippef54cb662015-05-10 22:19:31 +0000744 if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm);
745 }
746 return sm;
747}
748
749static SecMap* shmem__alloc_or_recycle_SecMap ( void )
sewardjf98e1c02008-10-25 16:22:41 +0000750{
751 Word i, j;
philippef54cb662015-05-10 22:19:31 +0000752 SecMap* sm = pop_SecMap_from_freelist();
753
754 if (!sm) {
755 sm = shmem__bigchunk_alloc( sizeof(SecMap) );
756 stats__secmaps_allocd++;
757 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
758 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
759 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
760 }
sewardjf98e1c02008-10-25 16:22:41 +0000761 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
762 tl_assert(sm);
763 sm->magic = SecMap_MAGIC;
764 for (i = 0; i < N_SECMAP_ZLINES; i++) {
765 sm->linesZ[i].dict[0] = SVal_NOACCESS;
766 sm->linesZ[i].dict[1] = SVal_INVALID;
767 sm->linesZ[i].dict[2] = SVal_INVALID;
768 sm->linesZ[i].dict[3] = SVal_INVALID;
769 for (j = 0; j < N_LINE_ARANGE/4; j++)
770 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
771 }
sewardjf98e1c02008-10-25 16:22:41 +0000772 return sm;
773}
774
775typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
776static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
777
778static SecMap* shmem__find_SecMap ( Addr ga )
779{
780 SecMap* sm = NULL;
781 Addr gaKey = shmem__round_to_SecMap_base(ga);
782 // Cache
783 stats__secmaps_search++;
784 if (LIKELY(gaKey == smCache[0].gaKey))
785 return smCache[0].sm;
786 if (LIKELY(gaKey == smCache[1].gaKey)) {
787 SMCacheEnt tmp = smCache[0];
788 smCache[0] = smCache[1];
789 smCache[1] = tmp;
790 return smCache[0].sm;
791 }
792 if (gaKey == smCache[2].gaKey) {
793 SMCacheEnt tmp = smCache[1];
794 smCache[1] = smCache[2];
795 smCache[2] = tmp;
796 return smCache[1].sm;
797 }
798 // end Cache
799 stats__secmaps_search_slow++;
800 if (VG_(lookupFM)( map_shmem,
801 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
802 tl_assert(sm != NULL);
803 smCache[2] = smCache[1];
804 smCache[1] = smCache[0];
805 smCache[0].gaKey = gaKey;
806 smCache[0].sm = sm;
807 } else {
808 tl_assert(sm == NULL);
809 }
810 return sm;
811}
812
philippef54cb662015-05-10 22:19:31 +0000813/* Scan the SecMap and count the SecMap that can be GC-ed.
814 If really, really does the GC of the SecMap. */
815/* NOT TO BE CALLED FROM WITHIN libzsm. */
816static UWord next_SecMap_GC_at = 1000;
817__attribute__((noinline))
818static UWord shmem__SecMap_do_GC(Bool really)
819{
820 UWord secmapW = 0;
821 Addr gaKey;
822 UWord examined = 0;
823 UWord ok_GCed = 0;
824
825 /* First invalidate the smCache */
826 smCache[0].gaKey = 1;
827 smCache[1].gaKey = 1;
828 smCache[2].gaKey = 1;
829 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0]));
830
831 VG_(initIterFM)( map_shmem );
832 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
833 UWord i;
834 UWord j;
philippe71ed3c92015-05-17 19:32:42 +0000835 UWord n_linesF = 0;
philippef54cb662015-05-10 22:19:31 +0000836 SecMap* sm = (SecMap*)secmapW;
837 tl_assert(sm->magic == SecMap_MAGIC);
838 Bool ok_to_GC = True;
839
840 examined++;
841
philippe71ed3c92015-05-17 19:32:42 +0000842 /* Deal with the LineZs and the possible LineF of a LineZ. */
philippef54cb662015-05-10 22:19:31 +0000843 for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) {
844 LineZ* lineZ = &sm->linesZ[i];
philippe71ed3c92015-05-17 19:32:42 +0000845 if (lineZ->dict[0] != SVal_INVALID) {
846 ok_to_GC = lineZ->dict[0] == SVal_NOACCESS
philippef54cb662015-05-10 22:19:31 +0000847 && !SVal__isC (lineZ->dict[1])
848 && !SVal__isC (lineZ->dict[2])
philippe71ed3c92015-05-17 19:32:42 +0000849 && !SVal__isC (lineZ->dict[3]);
850 } else {
851 LineF *lineF = LineF_Ptr(lineZ);
852 n_linesF++;
853 for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++)
854 ok_to_GC = lineF->w64s[j] == SVal_NOACCESS;
855 }
philippef54cb662015-05-10 22:19:31 +0000856 }
857 if (ok_to_GC)
858 ok_GCed++;
859 if (ok_to_GC && really) {
860 SecMap *fm_sm;
861 Addr fm_gaKey;
862 /* We cannot remove a SecMap from map_shmem while iterating.
863 So, stop iteration, remove from map_shmem, recreate the iteration
864 on the next SecMap. */
865 VG_(doneIterFM) ( map_shmem );
philippe71ed3c92015-05-17 19:32:42 +0000866 /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS.
867 We just need to free the lineF referenced by the linesZ. */
868 if (n_linesF > 0) {
869 for (i = 0; i < N_SECMAP_ZLINES && n_linesF > 0; i++) {
870 LineZ* lineZ = &sm->linesZ[i];
871 if (lineZ->dict[0] == SVal_INVALID) {
872 VG_(freeEltPA)( LineF_pool_allocator, LineF_Ptr(lineZ) );
873 n_linesF--;
874 }
875 }
philippef54cb662015-05-10 22:19:31 +0000876 }
877 if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey))
878 tl_assert (0);
879 stats__secmaps_in_map_shmem--;
880 tl_assert (gaKey == fm_gaKey);
881 tl_assert (sm == fm_sm);
882 stats__secmaps_scanGCed++;
883 push_SecMap_on_freelist (sm);
884 VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE);
885 }
886 }
887 VG_(doneIterFM)( map_shmem );
888
889 if (really) {
890 stats__secmaps_scanGC++;
891 /* Next GC when we approach the max allocated */
892 next_SecMap_GC_at = stats__secmaps_allocd - 1000;
893 /* Unless we GCed less than 10%. We then allow to alloc 10%
894 more before GCing. This avoids doing a lot of costly GC
895 for the worst case : the 'growing phase' of an application
896 that allocates a lot of memory.
897 Worst can can be reproduced e.g. by
898 perf/memrw -t 30000000 -b 1000 -r 1 -l 1
899 that allocates around 30Gb of memory. */
900 if (ok_GCed < stats__secmaps_allocd/10)
901 next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10;
902
903 }
904
905 if (VG_(clo_stats) && really) {
906 VG_(message)(Vg_DebugMsg,
907 "libhb: SecMap GC: #%lu scanned %lu, GCed %lu,"
908 " next GC at %lu\n",
909 stats__secmaps_scanGC, examined, ok_GCed,
910 next_SecMap_GC_at);
911 }
912
913 return ok_GCed;
914}
915
sewardjf98e1c02008-10-25 16:22:41 +0000916static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
917{
918 SecMap* sm = shmem__find_SecMap ( ga );
919 if (LIKELY(sm)) {
philippef54cb662015-05-10 22:19:31 +0000920 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000921 return sm;
922 } else {
923 /* create a new one */
924 Addr gaKey = shmem__round_to_SecMap_base(ga);
philippef54cb662015-05-10 22:19:31 +0000925 sm = shmem__alloc_or_recycle_SecMap();
sewardjf98e1c02008-10-25 16:22:41 +0000926 tl_assert(sm);
927 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
philippef54cb662015-05-10 22:19:31 +0000928 stats__secmaps_in_map_shmem++;
929 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000930 return sm;
931 }
932}
933
philippe0fb30ac2015-05-15 13:17:17 +0000934/* Returns the nr of linesF which are in use. Note: this is scanning
935 the secmap wordFM. So, this is to be used for statistics only. */
936__attribute__((noinline))
937static UWord shmem__SecMap_used_linesF(void)
938{
939 UWord secmapW = 0;
940 Addr gaKey;
941 UWord inUse = 0;
philippe0fb30ac2015-05-15 13:17:17 +0000942
943 VG_(initIterFM)( map_shmem );
944 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
945 UWord i;
946 SecMap* sm = (SecMap*)secmapW;
947 tl_assert(sm->magic == SecMap_MAGIC);
948
philippe71ed3c92015-05-17 19:32:42 +0000949 for (i = 0; i < N_SECMAP_ZLINES; i++) {
950 LineZ* lineZ = &sm->linesZ[i];
951 if (lineZ->dict[0] == SVal_INVALID)
philippe0fb30ac2015-05-15 13:17:17 +0000952 inUse++;
philippe0fb30ac2015-05-15 13:17:17 +0000953 }
954 }
955 VG_(doneIterFM)( map_shmem );
philippe0fb30ac2015-05-15 13:17:17 +0000956
957 return inUse;
958}
sewardjf98e1c02008-10-25 16:22:41 +0000959
960/* ------------ LineF and LineZ related ------------ */
961
962static void rcinc_LineF ( LineF* lineF ) {
963 UWord i;
sewardjf98e1c02008-10-25 16:22:41 +0000964 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000965 SVal__rcinc(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000966}
967
968static void rcdec_LineF ( LineF* lineF ) {
969 UWord i;
sewardjf98e1c02008-10-25 16:22:41 +0000970 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000971 SVal__rcdec(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000972}
973
974static void rcinc_LineZ ( LineZ* lineZ ) {
975 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000976 SVal__rcinc(lineZ->dict[0]);
977 if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]);
978 if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]);
979 if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000980}
981
982static void rcdec_LineZ ( LineZ* lineZ ) {
983 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000984 SVal__rcdec(lineZ->dict[0]);
985 if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]);
986 if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]);
987 if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000988}
989
990inline
991static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
992 Word bix, shft, mask, prep;
993 tl_assert(ix >= 0);
994 bix = ix >> 2;
995 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
996 mask = 3 << shft;
997 prep = b2 << shft;
998 arr[bix] = (arr[bix] & ~mask) | prep;
999}
1000
1001inline
1002static UWord read_twobit_array ( UChar* arr, UWord ix ) {
1003 Word bix, shft;
1004 tl_assert(ix >= 0);
1005 bix = ix >> 2;
1006 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1007 return (arr[bix] >> shft) & 3;
1008}
1009
philippe71ed3c92015-05-17 19:32:42 +00001010/* Allocates a lineF for LineZ. Sets lineZ in a state indicating
1011 lineF has to be used. */
1012static inline LineF *alloc_LineF_for_Z (LineZ *lineZ)
1013{
1014 LineF *lineF;
1015
1016 tl_assert(lineZ->dict[0] == SVal_INVALID);
1017
1018 lineF = VG_(allocEltPA) ( LineF_pool_allocator );
1019 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1020 lineZ->dict[1] = Ptr2SVal (lineF);
1021
1022 return lineF;
1023}
1024
1025/* rcdec the LineF of lineZ, frees the lineF, and sets lineZ
1026 back to its initial state SVal_NOACCESS (i.e. ready to be
1027 read or written just after SecMap allocation). */
1028static inline void clear_LineF_of_Z (LineZ *lineZ)
1029{
1030 LineF *lineF = LineF_Ptr(lineZ);
1031
1032 rcdec_LineF(lineF);
1033 VG_(freeEltPA)( LineF_pool_allocator, lineF );
1034 lineZ->dict[0] = SVal_NOACCESS;
1035 lineZ->dict[1] = SVal_INVALID;
1036}
1037
sewardjf98e1c02008-10-25 16:22:41 +00001038/* Given address 'tag', find either the Z or F line containing relevant
1039 data, so it can be read into the cache.
1040*/
1041static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
1042 /*OUT*/LineF** fp, Addr tag ) {
1043 LineZ* lineZ;
1044 LineF* lineF;
1045 UWord zix;
1046 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1047 UWord smoff = shmem__get_SecMap_offset(tag);
1048 /* since smoff is derived from a valid tag, it should be
1049 cacheline-aligned. */
1050 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1051 zix = smoff >> N_LINE_BITS;
1052 tl_assert(zix < N_SECMAP_ZLINES);
1053 lineZ = &sm->linesZ[zix];
1054 lineF = NULL;
1055 if (lineZ->dict[0] == SVal_INVALID) {
philippe71ed3c92015-05-17 19:32:42 +00001056 lineF = LineF_Ptr (lineZ);
sewardjf98e1c02008-10-25 16:22:41 +00001057 lineZ = NULL;
1058 }
1059 *zp = lineZ;
1060 *fp = lineF;
1061}
1062
1063/* Given address 'tag', return the relevant SecMap and the index of
1064 the LineZ within it, in the expectation that the line is to be
1065 overwritten. Regardless of whether 'tag' is currently associated
1066 with a Z or F representation, to rcdec on the current
1067 representation, in recognition of the fact that the contents are
1068 just about to be overwritten. */
1069static __attribute__((noinline))
1070void find_Z_for_writing ( /*OUT*/SecMap** smp,
1071 /*OUT*/Word* zixp,
1072 Addr tag ) {
1073 LineZ* lineZ;
sewardjf98e1c02008-10-25 16:22:41 +00001074 UWord zix;
1075 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1076 UWord smoff = shmem__get_SecMap_offset(tag);
1077 /* since smoff is derived from a valid tag, it should be
1078 cacheline-aligned. */
1079 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1080 zix = smoff >> N_LINE_BITS;
1081 tl_assert(zix < N_SECMAP_ZLINES);
1082 lineZ = &sm->linesZ[zix];
philippe71ed3c92015-05-17 19:32:42 +00001083 /* re RCs, we are rcdec_LineZ/clear_LineF_of_Z this LineZ so that new data
1084 can be parked in it. Hence have to rcdec it accordingly. */
sewardjf98e1c02008-10-25 16:22:41 +00001085 /* If lineZ has an associated lineF, free it up. */
philippe71ed3c92015-05-17 19:32:42 +00001086 if (lineZ->dict[0] == SVal_INVALID)
1087 clear_LineF_of_Z(lineZ);
1088 else
sewardjf98e1c02008-10-25 16:22:41 +00001089 rcdec_LineZ(lineZ);
sewardjf98e1c02008-10-25 16:22:41 +00001090 *smp = sm;
1091 *zixp = zix;
1092}
1093
sewardjf98e1c02008-10-25 16:22:41 +00001094/* ------------ CacheLine and implicit-tree related ------------ */
1095
1096__attribute__((unused))
1097static void pp_CacheLine ( CacheLine* cl ) {
1098 Word i;
1099 if (!cl) {
1100 VG_(printf)("%s","pp_CacheLine(NULL)\n");
1101 return;
1102 }
1103 for (i = 0; i < N_LINE_TREES; i++)
1104 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
1105 for (i = 0; i < N_LINE_ARANGE; i++)
1106 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
1107}
1108
1109static UChar descr_to_validbits ( UShort descr )
1110{
1111 /* a.k.a Party Time for gcc's constant folder */
1112# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
1113 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
1114 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
1115 ( (b8_5) << 12) | ( (b8_4) << 11) | \
1116 ( (b8_3) << 10) | ( (b8_2) << 9) | \
1117 ( (b8_1) << 8) | ( (b8_0) << 7) | \
1118 ( (b16_3) << 6) | ( (b32_1) << 5) | \
1119 ( (b16_2) << 4) | ( (b64) << 3) | \
1120 ( (b16_1) << 2) | ( (b32_0) << 1) | \
1121 ( (b16_0) << 0) ) )
1122
1123# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
1124 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
1125 ( (bit5) << 5) | ( (bit4) << 4) | \
1126 ( (bit3) << 3) | ( (bit2) << 2) | \
1127 ( (bit1) << 1) | ( (bit0) << 0) ) )
1128
1129 /* these should all get folded out at compile time */
1130 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
1131 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
1132 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
1133 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
1134 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
1135 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
1136 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
1137 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
1138 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
1139
1140 switch (descr) {
1141 /*
1142 +--------------------------------- TREE_DESCR_8_7
1143 | +------------------- TREE_DESCR_8_0
1144 | | +---------------- TREE_DESCR_16_3
1145 | | | +-------------- TREE_DESCR_32_1
1146 | | | | +------------ TREE_DESCR_16_2
1147 | | | | | +--------- TREE_DESCR_64
1148 | | | | | | +------ TREE_DESCR_16_1
1149 | | | | | | | +---- TREE_DESCR_32_0
1150 | | | | | | | | +-- TREE_DESCR_16_0
1151 | | | | | | | | |
1152 | | | | | | | | | GRANULARITY, 7 -> 0 */
1153 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
1154 return BYTE(1,1,1,1,1,1,1,1);
1155 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
1156 return BYTE(1,1,0,1,1,1,1,1);
1157 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
1158 return BYTE(0,1,1,1,1,1,1,1);
1159 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
1160 return BYTE(0,1,0,1,1,1,1,1);
1161
1162 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
1163 return BYTE(1,1,1,1,1,1,0,1);
1164 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
1165 return BYTE(1,1,0,1,1,1,0,1);
1166 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
1167 return BYTE(0,1,1,1,1,1,0,1);
1168 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
1169 return BYTE(0,1,0,1,1,1,0,1);
1170
1171 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
1172 return BYTE(1,1,1,1,0,1,1,1);
1173 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1174 return BYTE(1,1,0,1,0,1,1,1);
1175 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1176 return BYTE(0,1,1,1,0,1,1,1);
1177 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1178 return BYTE(0,1,0,1,0,1,1,1);
1179
1180 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1181 return BYTE(1,1,1,1,0,1,0,1);
1182 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1183 return BYTE(1,1,0,1,0,1,0,1);
1184 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1185 return BYTE(0,1,1,1,0,1,0,1);
1186 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1187 return BYTE(0,1,0,1,0,1,0,1);
1188
1189 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1190 return BYTE(0,0,0,1,1,1,1,1);
1191 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1192 return BYTE(0,0,0,1,1,1,0,1);
1193 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1194 return BYTE(0,0,0,1,0,1,1,1);
1195 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1196 return BYTE(0,0,0,1,0,1,0,1);
1197
1198 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1199 return BYTE(1,1,1,1,0,0,0,1);
1200 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1201 return BYTE(1,1,0,1,0,0,0,1);
1202 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1203 return BYTE(0,1,1,1,0,0,0,1);
1204 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1205 return BYTE(0,1,0,1,0,0,0,1);
1206
1207 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1208 return BYTE(0,0,0,1,0,0,0,1);
1209
1210 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1211 return BYTE(0,0,0,0,0,0,0,1);
1212
1213 default: return BYTE(0,0,0,0,0,0,0,0);
1214 /* INVALID - any valid descr produces at least one
1215 valid bit in tree[0..7]*/
1216 }
1217 /* NOTREACHED*/
1218 tl_assert(0);
1219
1220# undef DESCR
1221# undef BYTE
1222}
1223
1224__attribute__((unused))
1225static Bool is_sane_Descr ( UShort descr ) {
1226 return descr_to_validbits(descr) != 0;
1227}
1228
1229static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1230 VG_(sprintf)(dst,
1231 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1232 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1233 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1234 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1235 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1236 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1237 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1238 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1239 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1240 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1241 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1242 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1243 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1244 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1245 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1246 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1247 );
1248}
1249static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1250 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1251 (Int)((byte & 128) ? 1 : 0),
1252 (Int)((byte & 64) ? 1 : 0),
1253 (Int)((byte & 32) ? 1 : 0),
1254 (Int)((byte & 16) ? 1 : 0),
1255 (Int)((byte & 8) ? 1 : 0),
1256 (Int)((byte & 4) ? 1 : 0),
1257 (Int)((byte & 2) ? 1 : 0),
1258 (Int)((byte & 1) ? 1 : 0)
1259 );
1260}
1261
1262static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1263 Word i;
1264 UChar validbits = descr_to_validbits(descr);
florian7b7d5942014-12-19 20:29:22 +00001265 HChar buf[128], buf2[128]; // large enough
sewardjf98e1c02008-10-25 16:22:41 +00001266 if (validbits == 0)
1267 goto bad;
1268 for (i = 0; i < 8; i++) {
1269 if (validbits & (1<<i)) {
1270 if (tree[i] == SVal_INVALID)
1271 goto bad;
1272 } else {
1273 if (tree[i] != SVal_INVALID)
1274 goto bad;
1275 }
1276 }
1277 return True;
1278 bad:
1279 sprintf_Descr( buf, descr );
1280 sprintf_Byte( buf2, validbits );
1281 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1282 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1283 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1284 for (i = 0; i < 8; i++)
1285 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1286 VG_(printf)("%s","}\n");
1287 return 0;
1288}
1289
1290static Bool is_sane_CacheLine ( CacheLine* cl )
1291{
1292 Word tno, cloff;
1293
1294 if (!cl) goto bad;
1295
1296 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1297 UShort descr = cl->descrs[tno];
1298 SVal* tree = &cl->svals[cloff];
1299 if (!is_sane_Descr_and_Tree(descr, tree))
1300 goto bad;
1301 }
1302 tl_assert(cloff == N_LINE_ARANGE);
1303 return True;
1304 bad:
1305 pp_CacheLine(cl);
1306 return False;
1307}
1308
1309static UShort normalise_tree ( /*MOD*/SVal* tree )
1310{
1311 UShort descr;
1312 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1313 particular no zeroes. */
philippe1475a7f2015-05-11 19:45:08 +00001314 if (CHECK_ZSM
1315 && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1316 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1317 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1318 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
sewardjf98e1c02008-10-25 16:22:41 +00001319 tl_assert(0);
1320
1321 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1322 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1323 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1324 /* build 16-bit layer */
1325 if (tree[1] == tree[0]) {
1326 tree[1] = SVal_INVALID;
1327 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1328 descr |= TREE_DESCR_16_0;
1329 }
1330 if (tree[3] == tree[2]) {
1331 tree[3] = SVal_INVALID;
1332 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1333 descr |= TREE_DESCR_16_1;
1334 }
1335 if (tree[5] == tree[4]) {
1336 tree[5] = SVal_INVALID;
1337 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1338 descr |= TREE_DESCR_16_2;
1339 }
1340 if (tree[7] == tree[6]) {
1341 tree[7] = SVal_INVALID;
1342 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1343 descr |= TREE_DESCR_16_3;
1344 }
1345 /* build 32-bit layer */
1346 if (tree[2] == tree[0]
1347 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1348 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1349 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1350 descr |= TREE_DESCR_32_0;
1351 }
1352 if (tree[6] == tree[4]
1353 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1354 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1355 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1356 descr |= TREE_DESCR_32_1;
1357 }
1358 /* build 64-bit layer */
1359 if (tree[4] == tree[0]
1360 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1361 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1362 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1363 descr |= TREE_DESCR_64;
1364 }
1365 return descr;
1366}
1367
1368/* This takes a cacheline where all the data is at the leaves
1369 (w8[..]) and builds a correctly normalised tree. */
1370static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1371{
1372 Word tno, cloff;
1373 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1374 SVal* tree = &cl->svals[cloff];
1375 cl->descrs[tno] = normalise_tree( tree );
1376 }
1377 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +00001378 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001379 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1380 stats__cline_normalises++;
1381}
1382
1383
1384typedef struct { UChar count; SVal sval; } CountedSVal;
1385
1386static
1387void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1388 /*OUT*/Word* dstUsedP,
1389 Word nDst, CacheLine* src )
1390{
1391 Word tno, cloff, dstUsed;
1392
1393 tl_assert(nDst == N_LINE_ARANGE);
1394 dstUsed = 0;
1395
1396 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1397 UShort descr = src->descrs[tno];
1398 SVal* tree = &src->svals[cloff];
1399
1400 /* sequentialise the tree described by (descr,tree). */
1401# define PUT(_n,_v) \
1402 do { dst[dstUsed ].count = (_n); \
1403 dst[dstUsed++].sval = (_v); \
1404 } while (0)
1405
1406 /* byte 0 */
1407 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1408 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1409 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1410 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1411 /* byte 1 */
1412 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1413 /* byte 2 */
1414 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1415 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1416 /* byte 3 */
1417 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1418 /* byte 4 */
1419 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1420 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1421 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1422 /* byte 5 */
1423 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1424 /* byte 6 */
1425 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1426 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1427 /* byte 7 */
1428 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1429
1430# undef PUT
1431 /* END sequentialise the tree described by (descr,tree). */
1432
1433 }
1434 tl_assert(cloff == N_LINE_ARANGE);
1435 tl_assert(dstUsed <= nDst);
1436
1437 *dstUsedP = dstUsed;
1438}
1439
1440/* Write the cacheline 'wix' to backing store. Where it ends up
1441 is determined by its tag field. */
1442static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1443{
1444 Word i, j, k, m;
1445 Addr tag;
1446 SecMap* sm;
1447 CacheLine* cl;
1448 LineZ* lineZ;
1449 LineF* lineF;
1450 Word zix, fix, csvalsUsed;
1451 CountedSVal csvals[N_LINE_ARANGE];
1452 SVal sv;
1453
1454 if (0)
1455 VG_(printf)("scache wback line %d\n", (Int)wix);
1456
1457 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1458
1459 tag = cache_shmem.tags0[wix];
1460 cl = &cache_shmem.lyns0[wix];
1461
1462 /* The cache line may have been invalidated; if so, ignore it. */
1463 if (!is_valid_scache_tag(tag))
1464 return;
1465
1466 /* Where are we going to put it? */
1467 sm = NULL;
1468 lineZ = NULL;
1469 lineF = NULL;
1470 zix = fix = -1;
1471
1472 /* find the Z line to write in and rcdec it or the associated F
1473 line. */
1474 find_Z_for_writing( &sm, &zix, tag );
1475
1476 tl_assert(sm);
1477 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1478 lineZ = &sm->linesZ[zix];
1479
1480 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001481 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001482 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1483
1484 csvalsUsed = -1;
1485 sequentialise_CacheLine( csvals, &csvalsUsed,
1486 N_LINE_ARANGE, cl );
1487 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1488 if (0) VG_(printf)("%lu ", csvalsUsed);
1489
1490 lineZ->dict[0] = lineZ->dict[1]
1491 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1492
1493 /* i indexes actual shadow values, k is cursor in csvals */
1494 i = 0;
1495 for (k = 0; k < csvalsUsed; k++) {
1496
1497 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001498 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001499 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1500 /* do we already have it? */
1501 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1502 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1503 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1504 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1505 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001506 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001507 tl_assert(sv != SVal_INVALID);
1508 if (lineZ->dict[0]
1509 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1510 if (lineZ->dict[1]
1511 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1512 if (lineZ->dict[2]
1513 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1514 if (lineZ->dict[3]
1515 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1516 break; /* we'll have to use the f rep */
1517 dict_ok:
1518 m = csvals[k].count;
1519 if (m == 8) {
1520 write_twobit_array( lineZ->ix2s, i+0, j );
1521 write_twobit_array( lineZ->ix2s, i+1, j );
1522 write_twobit_array( lineZ->ix2s, i+2, j );
1523 write_twobit_array( lineZ->ix2s, i+3, j );
1524 write_twobit_array( lineZ->ix2s, i+4, j );
1525 write_twobit_array( lineZ->ix2s, i+5, j );
1526 write_twobit_array( lineZ->ix2s, i+6, j );
1527 write_twobit_array( lineZ->ix2s, i+7, j );
1528 i += 8;
1529 }
1530 else if (m == 4) {
1531 write_twobit_array( lineZ->ix2s, i+0, j );
1532 write_twobit_array( lineZ->ix2s, i+1, j );
1533 write_twobit_array( lineZ->ix2s, i+2, j );
1534 write_twobit_array( lineZ->ix2s, i+3, j );
1535 i += 4;
1536 }
1537 else if (m == 1) {
1538 write_twobit_array( lineZ->ix2s, i+0, j );
1539 i += 1;
1540 }
1541 else if (m == 2) {
1542 write_twobit_array( lineZ->ix2s, i+0, j );
1543 write_twobit_array( lineZ->ix2s, i+1, j );
1544 i += 2;
1545 }
1546 else {
1547 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1548 }
1549
1550 }
1551
1552 if (LIKELY(i == N_LINE_ARANGE)) {
1553 /* Construction of the compressed representation was
1554 successful. */
1555 rcinc_LineZ(lineZ);
1556 stats__cache_Z_wbacks++;
1557 } else {
1558 /* Cannot use the compressed(z) representation. Use the full(f)
1559 rep instead. */
1560 tl_assert(i >= 0 && i < N_LINE_ARANGE);
sewardjf98e1c02008-10-25 16:22:41 +00001561 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
philippe71ed3c92015-05-17 19:32:42 +00001562 lineF = alloc_LineF_for_Z (lineZ);
sewardjf98e1c02008-10-25 16:22:41 +00001563 i = 0;
1564 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001565 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001566 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1567 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001568 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001569 tl_assert(sv != SVal_INVALID);
1570 for (m = csvals[k].count; m > 0; m--) {
1571 lineF->w64s[i] = sv;
1572 i++;
1573 }
1574 }
1575 tl_assert(i == N_LINE_ARANGE);
1576 rcinc_LineF(lineF);
1577 stats__cache_F_wbacks++;
1578 }
sewardjf98e1c02008-10-25 16:22:41 +00001579}
1580
1581/* Fetch the cacheline 'wix' from the backing store. The tag
1582 associated with 'wix' is assumed to have already been filled in;
1583 hence that is used to determine where in the backing store to read
1584 from. */
1585static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1586{
1587 Word i;
1588 Addr tag;
1589 CacheLine* cl;
1590 LineZ* lineZ;
1591 LineF* lineF;
1592
1593 if (0)
1594 VG_(printf)("scache fetch line %d\n", (Int)wix);
1595
1596 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1597
1598 tag = cache_shmem.tags0[wix];
1599 cl = &cache_shmem.lyns0[wix];
1600
1601 /* reject nonsense requests */
1602 tl_assert(is_valid_scache_tag(tag));
1603
1604 lineZ = NULL;
1605 lineF = NULL;
1606 find_ZF_for_reading( &lineZ, &lineF, tag );
1607 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1608
1609 /* expand the data into the bottom layer of the tree, then get
1610 cacheline_normalise to build the descriptor array. */
1611 if (lineF) {
sewardjf98e1c02008-10-25 16:22:41 +00001612 for (i = 0; i < N_LINE_ARANGE; i++) {
1613 cl->svals[i] = lineF->w64s[i];
1614 }
1615 stats__cache_F_fetches++;
1616 } else {
1617 for (i = 0; i < N_LINE_ARANGE; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00001618 UWord ix = read_twobit_array( lineZ->ix2s, i );
philippe1475a7f2015-05-11 19:45:08 +00001619 if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3);
1620 cl->svals[i] = lineZ->dict[ix];
1621 if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00001622 }
1623 stats__cache_Z_fetches++;
1624 }
1625 normalise_CacheLine( cl );
1626}
1627
philippe8939e092015-05-11 20:18:10 +00001628/* Invalid the cachelines corresponding to the given range, which
1629 must start and end on a cacheline boundary. */
philippef54cb662015-05-10 22:19:31 +00001630static void shmem__invalidate_scache_range (Addr ga, SizeT szB)
1631{
philippef54cb662015-05-10 22:19:31 +00001632 Word wix;
1633
philippe8939e092015-05-11 20:18:10 +00001634 /* ga must be on a cacheline boundary. */
1635 tl_assert (is_valid_scache_tag (ga));
1636 /* szB must be a multiple of cacheline size. */
1637 tl_assert (0 == (szB & (N_LINE_ARANGE - 1)));
1638
1639
philippef54cb662015-05-10 22:19:31 +00001640 Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1);
1641 Word nwix = szB / N_LINE_ARANGE;
1642
1643 if (nwix > N_WAY_NENT)
1644 nwix = N_WAY_NENT; // no need to check several times the same entry.
1645
1646 for (wix = 0; wix < nwix; wix++) {
1647 if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB))
1648 cache_shmem.tags0[ga_ix] = 1/*INVALID*/;
1649 ga_ix++;
philippe364f0bb2015-05-15 09:38:54 +00001650 if (UNLIKELY(ga_ix == N_WAY_NENT))
philippef54cb662015-05-10 22:19:31 +00001651 ga_ix = 0;
1652 }
sewardjf98e1c02008-10-25 16:22:41 +00001653}
1654
philippef54cb662015-05-10 22:19:31 +00001655
sewardjf98e1c02008-10-25 16:22:41 +00001656static void shmem__flush_and_invalidate_scache ( void ) {
1657 Word wix;
1658 Addr tag;
1659 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1660 tl_assert(!is_valid_scache_tag(1));
1661 for (wix = 0; wix < N_WAY_NENT; wix++) {
1662 tag = cache_shmem.tags0[wix];
1663 if (tag == 1/*INVALID*/) {
1664 /* already invalid; nothing to do */
1665 } else {
1666 tl_assert(is_valid_scache_tag(tag));
1667 cacheline_wback( wix );
1668 }
1669 cache_shmem.tags0[wix] = 1/*INVALID*/;
1670 }
philippef54cb662015-05-10 22:19:31 +00001671 stats__cache_flushes_invals++;
sewardjf98e1c02008-10-25 16:22:41 +00001672}
1673
1674
1675static inline Bool aligned16 ( Addr a ) {
1676 return 0 == (a & 1);
1677}
1678static inline Bool aligned32 ( Addr a ) {
1679 return 0 == (a & 3);
1680}
1681static inline Bool aligned64 ( Addr a ) {
1682 return 0 == (a & 7);
1683}
1684static inline UWord get_cacheline_offset ( Addr a ) {
1685 return (UWord)(a & (N_LINE_ARANGE - 1));
1686}
1687static inline Addr cacheline_ROUNDUP ( Addr a ) {
1688 return ROUNDUP(a, N_LINE_ARANGE);
1689}
1690static inline Addr cacheline_ROUNDDN ( Addr a ) {
1691 return ROUNDDN(a, N_LINE_ARANGE);
1692}
1693static inline UWord get_treeno ( Addr a ) {
1694 return get_cacheline_offset(a) >> 3;
1695}
1696static inline UWord get_tree_offset ( Addr a ) {
1697 return a & 7;
1698}
1699
1700static __attribute__((noinline))
1701 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1702static inline CacheLine* get_cacheline ( Addr a )
1703{
1704 /* tag is 'a' with the in-line offset masked out,
1705 eg a[31]..a[4] 0000 */
1706 Addr tag = a & ~(N_LINE_ARANGE - 1);
1707 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1708 stats__cache_totrefs++;
1709 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1710 return &cache_shmem.lyns0[wix];
1711 } else {
1712 return get_cacheline_MISS( a );
1713 }
1714}
1715
1716static __attribute__((noinline))
1717 CacheLine* get_cacheline_MISS ( Addr a )
1718{
1719 /* tag is 'a' with the in-line offset masked out,
1720 eg a[31]..a[4] 0000 */
1721
1722 CacheLine* cl;
1723 Addr* tag_old_p;
1724 Addr tag = a & ~(N_LINE_ARANGE - 1);
1725 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1726
1727 tl_assert(tag != cache_shmem.tags0[wix]);
1728
1729 /* Dump the old line into the backing store. */
1730 stats__cache_totmisses++;
1731
1732 cl = &cache_shmem.lyns0[wix];
1733 tag_old_p = &cache_shmem.tags0[wix];
1734
1735 if (is_valid_scache_tag( *tag_old_p )) {
1736 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001737 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001738 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1739 cacheline_wback( wix );
1740 }
1741 /* and reload the new one */
1742 *tag_old_p = tag;
1743 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001744 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001745 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1746 return cl;
1747}
1748
1749static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1750 stats__cline_64to32pulldown++;
1751 switch (toff) {
1752 case 0: case 4:
1753 tl_assert(descr & TREE_DESCR_64);
1754 tree[4] = tree[0];
1755 descr &= ~TREE_DESCR_64;
1756 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1757 break;
1758 default:
1759 tl_assert(0);
1760 }
1761 return descr;
1762}
1763
1764static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1765 stats__cline_32to16pulldown++;
1766 switch (toff) {
1767 case 0: case 2:
1768 if (!(descr & TREE_DESCR_32_0)) {
1769 descr = pulldown_to_32(tree, 0, descr);
1770 }
1771 tl_assert(descr & TREE_DESCR_32_0);
1772 tree[2] = tree[0];
1773 descr &= ~TREE_DESCR_32_0;
1774 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1775 break;
1776 case 4: case 6:
1777 if (!(descr & TREE_DESCR_32_1)) {
1778 descr = pulldown_to_32(tree, 4, descr);
1779 }
1780 tl_assert(descr & TREE_DESCR_32_1);
1781 tree[6] = tree[4];
1782 descr &= ~TREE_DESCR_32_1;
1783 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1784 break;
1785 default:
1786 tl_assert(0);
1787 }
1788 return descr;
1789}
1790
1791static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1792 stats__cline_16to8pulldown++;
1793 switch (toff) {
1794 case 0: case 1:
1795 if (!(descr & TREE_DESCR_16_0)) {
1796 descr = pulldown_to_16(tree, 0, descr);
1797 }
1798 tl_assert(descr & TREE_DESCR_16_0);
1799 tree[1] = tree[0];
1800 descr &= ~TREE_DESCR_16_0;
1801 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1802 break;
1803 case 2: case 3:
1804 if (!(descr & TREE_DESCR_16_1)) {
1805 descr = pulldown_to_16(tree, 2, descr);
1806 }
1807 tl_assert(descr & TREE_DESCR_16_1);
1808 tree[3] = tree[2];
1809 descr &= ~TREE_DESCR_16_1;
1810 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1811 break;
1812 case 4: case 5:
1813 if (!(descr & TREE_DESCR_16_2)) {
1814 descr = pulldown_to_16(tree, 4, descr);
1815 }
1816 tl_assert(descr & TREE_DESCR_16_2);
1817 tree[5] = tree[4];
1818 descr &= ~TREE_DESCR_16_2;
1819 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1820 break;
1821 case 6: case 7:
1822 if (!(descr & TREE_DESCR_16_3)) {
1823 descr = pulldown_to_16(tree, 6, descr);
1824 }
1825 tl_assert(descr & TREE_DESCR_16_3);
1826 tree[7] = tree[6];
1827 descr &= ~TREE_DESCR_16_3;
1828 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1829 break;
1830 default:
1831 tl_assert(0);
1832 }
1833 return descr;
1834}
1835
1836
1837static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1838 UShort mask;
1839 switch (toff) {
1840 case 0:
1841 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1842 tl_assert( (descr & mask) == mask );
1843 descr &= ~mask;
1844 descr |= TREE_DESCR_16_0;
1845 break;
1846 case 2:
1847 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1848 tl_assert( (descr & mask) == mask );
1849 descr &= ~mask;
1850 descr |= TREE_DESCR_16_1;
1851 break;
1852 case 4:
1853 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1854 tl_assert( (descr & mask) == mask );
1855 descr &= ~mask;
1856 descr |= TREE_DESCR_16_2;
1857 break;
1858 case 6:
1859 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1860 tl_assert( (descr & mask) == mask );
1861 descr &= ~mask;
1862 descr |= TREE_DESCR_16_3;
1863 break;
1864 default:
1865 tl_assert(0);
1866 }
1867 return descr;
1868}
1869
1870static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1871 UShort mask;
1872 switch (toff) {
1873 case 0:
1874 if (!(descr & TREE_DESCR_16_0))
1875 descr = pullup_descr_to_16(descr, 0);
1876 if (!(descr & TREE_DESCR_16_1))
1877 descr = pullup_descr_to_16(descr, 2);
1878 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1879 tl_assert( (descr & mask) == mask );
1880 descr &= ~mask;
1881 descr |= TREE_DESCR_32_0;
1882 break;
1883 case 4:
1884 if (!(descr & TREE_DESCR_16_2))
1885 descr = pullup_descr_to_16(descr, 4);
1886 if (!(descr & TREE_DESCR_16_3))
1887 descr = pullup_descr_to_16(descr, 6);
1888 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1889 tl_assert( (descr & mask) == mask );
1890 descr &= ~mask;
1891 descr |= TREE_DESCR_32_1;
1892 break;
1893 default:
1894 tl_assert(0);
1895 }
1896 return descr;
1897}
1898
1899static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1900 switch (toff) {
1901 case 0: case 4:
1902 return 0 != (descr & TREE_DESCR_64);
1903 default:
1904 tl_assert(0);
1905 }
1906}
1907
1908static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1909 switch (toff) {
1910 case 0:
1911 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1912 case 2:
1913 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1914 case 4:
1915 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1916 case 6:
1917 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1918 default:
1919 tl_assert(0);
1920 }
1921}
1922
1923/* ------------ Cache management ------------ */
1924
1925static void zsm_flush_cache ( void )
1926{
1927 shmem__flush_and_invalidate_scache();
1928}
1929
1930
philippe1475a7f2015-05-11 19:45:08 +00001931static void zsm_init ( void )
sewardjf98e1c02008-10-25 16:22:41 +00001932{
1933 tl_assert( sizeof(UWord) == sizeof(Addr) );
1934
sewardjf98e1c02008-10-25 16:22:41 +00001935 tl_assert(map_shmem == NULL);
1936 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1937 HG_(free),
1938 NULL/*unboxed UWord cmp*/);
philippef54cb662015-05-10 22:19:31 +00001939 /* Invalidate all cache entries. */
1940 tl_assert(!is_valid_scache_tag(1));
1941 for (UWord wix = 0; wix < N_WAY_NENT; wix++) {
1942 cache_shmem.tags0[wix] = 1/*INVALID*/;
1943 }
sewardjf98e1c02008-10-25 16:22:41 +00001944
philippe71ed3c92015-05-17 19:32:42 +00001945 LineF_pool_allocator = VG_(newPA) (
1946 sizeof(LineF),
1947 /* Nr elements/pool to fill a core arena block
1948 taking some arena overhead into account. */
1949 (4 * 1024 * 1024 - 200)/sizeof(LineF),
1950 HG_(zalloc),
1951 "libhb.LineF_storage.pool",
1952 HG_(free)
1953 );
1954
sewardjf98e1c02008-10-25 16:22:41 +00001955 /* a SecMap must contain an integral number of CacheLines */
1956 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1957 /* also ... a CacheLine holds an integral number of trees */
1958 tl_assert(0 == (N_LINE_ARANGE % 8));
1959}
1960
1961/////////////////////////////////////////////////////////////////
1962/////////////////////////////////////////////////////////////////
1963// //
1964// SECTION END compressed shadow memory //
1965// //
1966/////////////////////////////////////////////////////////////////
1967/////////////////////////////////////////////////////////////////
1968
1969
1970
1971/////////////////////////////////////////////////////////////////
1972/////////////////////////////////////////////////////////////////
1973// //
1974// SECTION BEGIN vts primitives //
1975// //
1976/////////////////////////////////////////////////////////////////
1977/////////////////////////////////////////////////////////////////
1978
sewardjf98e1c02008-10-25 16:22:41 +00001979
sewardje4cce742011-02-24 15:25:24 +00001980/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1981 being compact stand-ins for Thr*'s. Use these functions to map
1982 between them. */
1983static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
1984static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
1985
sewardje4cce742011-02-24 15:25:24 +00001986__attribute__((noreturn))
1987static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
1988{
1989 if (due_to_nThrs) {
florian6bf37262012-10-21 03:23:36 +00001990 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001991 "\n"
1992 "Helgrind: cannot continue, run aborted: too many threads.\n"
1993 "Sorry. Helgrind can only handle programs that create\n"
1994 "%'llu or fewer threads over their entire lifetime.\n"
1995 "\n";
sewardj03e7d272011-05-04 09:08:34 +00001996 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
sewardje4cce742011-02-24 15:25:24 +00001997 } else {
florian6bf37262012-10-21 03:23:36 +00001998 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001999 "\n"
2000 "Helgrind: cannot continue, run aborted: too many\n"
2001 "synchronisation events. Sorry. Helgrind can only handle\n"
2002 "programs which perform %'llu or fewer\n"
2003 "inter-thread synchronisation events (locks, unlocks, etc).\n"
2004 "\n";
2005 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
2006 }
2007 VG_(exit)(1);
2008 /*NOTREACHED*/
2009 tl_assert(0); /*wtf?!*/
2010}
2011
2012
philippec3508652015-03-28 12:01:58 +00002013/* The dead thread (ThrID, actually) tables. A thread may only be
sewardjffce8152011-06-24 10:09:41 +00002014 listed here if we have been notified thereof by libhb_async_exit.
2015 New entries are added at the end. The order isn't important, but
philippec3508652015-03-28 12:01:58 +00002016 the ThrID values must be unique.
2017 verydead_thread_table_not_pruned lists the identity of the threads
2018 that died since the previous round of pruning.
2019 Once pruning is done, these ThrID are added in verydead_thread_table.
2020 We don't actually need to keep the set of threads that have ever died --
sewardjffce8152011-06-24 10:09:41 +00002021 only the threads that have died since the previous round of
2022 pruning. But it's useful for sanity check purposes to keep the
2023 entire set, so we do. */
philippec3508652015-03-28 12:01:58 +00002024static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
sewardjffce8152011-06-24 10:09:41 +00002025static XArray* /* of ThrID */ verydead_thread_table = NULL;
2026
2027/* Arbitrary total ordering on ThrIDs. */
florian6bd9dc12012-11-23 16:17:43 +00002028static Int cmp__ThrID ( const void* v1, const void* v2 ) {
2029 ThrID id1 = *(const ThrID*)v1;
2030 ThrID id2 = *(const ThrID*)v2;
sewardjffce8152011-06-24 10:09:41 +00002031 if (id1 < id2) return -1;
2032 if (id1 > id2) return 1;
2033 return 0;
2034}
2035
philippec3508652015-03-28 12:01:58 +00002036static void verydead_thread_tables_init ( void )
sewardjffce8152011-06-24 10:09:41 +00002037{
2038 tl_assert(!verydead_thread_table);
philippec3508652015-03-28 12:01:58 +00002039 tl_assert(!verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00002040 verydead_thread_table
2041 = VG_(newXA)( HG_(zalloc),
2042 "libhb.verydead_thread_table_init.1",
2043 HG_(free), sizeof(ThrID) );
sewardjffce8152011-06-24 10:09:41 +00002044 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
philippec3508652015-03-28 12:01:58 +00002045 verydead_thread_table_not_pruned
2046 = VG_(newXA)( HG_(zalloc),
2047 "libhb.verydead_thread_table_init.2",
2048 HG_(free), sizeof(ThrID) );
2049 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
sewardjffce8152011-06-24 10:09:41 +00002050}
2051
philippec3508652015-03-28 12:01:58 +00002052static void verydead_thread_table_sort_and_check (XArray* thrids)
2053{
2054 UWord i;
2055
2056 VG_(sortXA)( thrids );
2057 /* Sanity check: check for unique .sts.thr values. */
2058 UWord nBT = VG_(sizeXA)( thrids );
2059 if (nBT > 0) {
2060 ThrID thrid1, thrid2;
2061 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
2062 for (i = 1; i < nBT; i++) {
2063 thrid1 = thrid2;
2064 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
2065 tl_assert(thrid1 < thrid2);
2066 }
2067 }
2068 /* Ok, so the dead thread table thrids has unique and in-order keys. */
2069}
sewardjf98e1c02008-10-25 16:22:41 +00002070
2071/* A VTS contains .ts, its vector clock, and also .id, a field to hold
2072 a backlink for the caller's convenience. Since we have no idea
2073 what to set that to in the library, it always gets set to
2074 VtsID_INVALID. */
2075typedef
2076 struct {
sewardj7aa38a92011-02-27 23:04:12 +00002077 VtsID id;
2078 UInt usedTS;
2079 UInt sizeTS;
2080 ScalarTS ts[0];
sewardjf98e1c02008-10-25 16:22:41 +00002081 }
2082 VTS;
2083
sewardj7aa38a92011-02-27 23:04:12 +00002084/* Allocate a VTS capable of storing 'sizeTS' entries. */
florian6bd9dc12012-11-23 16:17:43 +00002085static VTS* VTS__new ( const HChar* who, UInt sizeTS );
sewardjf98e1c02008-10-25 16:22:41 +00002086
sewardjffce8152011-06-24 10:09:41 +00002087/* Make a clone of 'vts', sizing the new array to exactly match the
sewardj7aa38a92011-02-27 23:04:12 +00002088 number of ScalarTSs present. */
florian6bd9dc12012-11-23 16:17:43 +00002089static VTS* VTS__clone ( const HChar* who, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002090
sewardjffce8152011-06-24 10:09:41 +00002091/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
2092 array is sized exactly to hold the number of required elements.
2093 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
2094 must be in strictly increasing order. */
florian6bd9dc12012-11-23 16:17:43 +00002095static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
sewardjffce8152011-06-24 10:09:41 +00002096
sewardjf98e1c02008-10-25 16:22:41 +00002097/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00002098static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002099
sewardj7aa38a92011-02-27 23:04:12 +00002100/* Create a new singleton VTS in 'out'. Caller must have
2101 pre-allocated 'out' sufficiently big to hold the result in all
2102 possible cases. */
2103static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00002104
sewardj7aa38a92011-02-27 23:04:12 +00002105/* Create in 'out' a VTS which is the same as 'vts' except with
2106 vts[me]++, so to speak. Caller must have pre-allocated 'out'
2107 sufficiently big to hold the result in all possible cases. */
2108static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002109
sewardj7aa38a92011-02-27 23:04:12 +00002110/* Create in 'out' a VTS which is the join (max) of 'a' and
2111 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
2112 the result in all possible cases. */
2113static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002114
sewardj23f12002009-07-24 08:45:08 +00002115/* Compute the partial ordering relation of the two args. Although we
2116 could be completely general and return an enumeration value (EQ,
2117 LT, GT, UN), in fact we only need LEQ, and so we may as well
2118 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00002119
sewardje4cce742011-02-24 15:25:24 +00002120 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
2121 invald ThrID). In the latter case, the returned ThrID indicates
2122 the discovered point for which they are not. There may be more
2123 than one such point, but we only care about seeing one of them, not
2124 all of them. This rather strange convention is used because
2125 sometimes we want to know the actual index at which they first
2126 differ. */
2127static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002128
2129/* Compute an arbitrary structural (total) ordering on the two args,
2130 based on their VCs, so they can be looked up in a table, tree, etc.
2131 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00002132static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002133
florianb28fe892014-10-28 20:52:07 +00002134/* Debugging only. Display the given VTS. */
2135static void VTS__show ( const VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002136
2137/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00002138static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002139
sewardjffce8152011-06-24 10:09:41 +00002140/* Notify the VTS machinery that a thread has been declared
2141 comprehensively dead: that is, it has done an async exit AND it has
2142 been joined with. This should ensure that its local clocks (.viR
2143 and .viW) will never again change, and so all mentions of this
2144 thread from all VTSs in the system may be removed. */
2145static void VTS__declare_thread_very_dead ( Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002146
2147/*--------------- to do with Vector Timestamps ---------------*/
2148
sewardjf98e1c02008-10-25 16:22:41 +00002149static Bool is_sane_VTS ( VTS* vts )
2150{
2151 UWord i, n;
2152 ScalarTS *st1, *st2;
2153 if (!vts) return False;
sewardj555fc572011-02-27 23:39:53 +00002154 if (vts->usedTS > vts->sizeTS) return False;
sewardj7aa38a92011-02-27 23:04:12 +00002155 n = vts->usedTS;
2156 if (n == 1) {
2157 st1 = &vts->ts[0];
2158 if (st1->tym == 0)
2159 return False;
2160 }
2161 else
sewardjf98e1c02008-10-25 16:22:41 +00002162 if (n >= 2) {
2163 for (i = 0; i < n-1; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002164 st1 = &vts->ts[i];
2165 st2 = &vts->ts[i+1];
sewardje4cce742011-02-24 15:25:24 +00002166 if (st1->thrid >= st2->thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002167 return False;
2168 if (st1->tym == 0 || st2->tym == 0)
2169 return False;
2170 }
2171 }
2172 return True;
2173}
2174
2175
sewardj7aa38a92011-02-27 23:04:12 +00002176/* Create a new, empty VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002177*/
florian6bd9dc12012-11-23 16:17:43 +00002178static VTS* VTS__new ( const HChar* who, UInt sizeTS )
sewardjf98e1c02008-10-25 16:22:41 +00002179{
sewardj7aa38a92011-02-27 23:04:12 +00002180 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
2181 tl_assert(vts->usedTS == 0);
2182 vts->sizeTS = sizeTS;
2183 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
sewardjf98e1c02008-10-25 16:22:41 +00002184 return vts;
2185}
2186
sewardj7aa38a92011-02-27 23:04:12 +00002187/* Clone this VTS.
2188*/
florian6bd9dc12012-11-23 16:17:43 +00002189static VTS* VTS__clone ( const HChar* who, VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002190{
2191 tl_assert(vts);
2192 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2193 UInt nTS = vts->usedTS;
2194 VTS* clone = VTS__new(who, nTS);
2195 clone->id = vts->id;
2196 clone->sizeTS = nTS;
2197 clone->usedTS = nTS;
2198 UInt i;
2199 for (i = 0; i < nTS; i++) {
2200 clone->ts[i] = vts->ts[i];
2201 }
2202 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2203 return clone;
2204}
2205
sewardjf98e1c02008-10-25 16:22:41 +00002206
sewardjffce8152011-06-24 10:09:41 +00002207/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
2208 must be in strictly increasing order. We could obviously do this
2209 much more efficiently (in linear time) if necessary.
2210*/
florian6bd9dc12012-11-23 16:17:43 +00002211static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
sewardjffce8152011-06-24 10:09:41 +00002212{
2213 UInt i, j;
2214 tl_assert(vts);
2215 tl_assert(thridsToDel);
2216 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2217 UInt nTS = vts->usedTS;
2218 /* Figure out how many ScalarTSs will remain in the output. */
2219 UInt nReq = nTS;
2220 for (i = 0; i < nTS; i++) {
2221 ThrID thrid = vts->ts[i].thrid;
2222 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2223 nReq--;
2224 }
2225 tl_assert(nReq <= nTS);
2226 /* Copy the ones that will remain. */
2227 VTS* res = VTS__new(who, nReq);
2228 j = 0;
2229 for (i = 0; i < nTS; i++) {
2230 ThrID thrid = vts->ts[i].thrid;
2231 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2232 continue;
2233 res->ts[j++] = vts->ts[i];
2234 }
2235 tl_assert(j == nReq);
2236 tl_assert(j == res->sizeTS);
2237 res->usedTS = j;
2238 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2239 return res;
2240}
2241
2242
sewardjf98e1c02008-10-25 16:22:41 +00002243/* Delete this VTS in its entirety.
2244*/
sewardj7aa38a92011-02-27 23:04:12 +00002245static void VTS__delete ( VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002246{
2247 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002248 tl_assert(vts->usedTS <= vts->sizeTS);
2249 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
sewardjf98e1c02008-10-25 16:22:41 +00002250 HG_(free)(vts);
2251}
2252
2253
2254/* Create a new singleton VTS.
2255*/
sewardj7aa38a92011-02-27 23:04:12 +00002256static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2257{
sewardjf98e1c02008-10-25 16:22:41 +00002258 tl_assert(thr);
2259 tl_assert(tym >= 1);
sewardj7aa38a92011-02-27 23:04:12 +00002260 tl_assert(out);
2261 tl_assert(out->usedTS == 0);
2262 tl_assert(out->sizeTS >= 1);
2263 UInt hi = out->usedTS++;
2264 out->ts[hi].thrid = Thr__to_ThrID(thr);
2265 out->ts[hi].tym = tym;
sewardjf98e1c02008-10-25 16:22:41 +00002266}
2267
2268
2269/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2270 not modified.
2271*/
sewardj7aa38a92011-02-27 23:04:12 +00002272static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002273{
sewardj7aa38a92011-02-27 23:04:12 +00002274 UInt i, n;
sewardje4cce742011-02-24 15:25:24 +00002275 ThrID me_thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002276 Bool found = False;
sewardjc8028ad2010-05-05 09:34:42 +00002277
2278 stats__vts__tick++;
2279
sewardj7aa38a92011-02-27 23:04:12 +00002280 tl_assert(out);
2281 tl_assert(out->usedTS == 0);
2282 if (vts->usedTS >= ThrID_MAX_VALID)
2283 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2284 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2285
sewardjf98e1c02008-10-25 16:22:41 +00002286 tl_assert(me);
sewardje4cce742011-02-24 15:25:24 +00002287 me_thrid = Thr__to_ThrID(me);
sewardjf98e1c02008-10-25 16:22:41 +00002288 tl_assert(is_sane_VTS(vts));
sewardj7aa38a92011-02-27 23:04:12 +00002289 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002290
sewardj555fc572011-02-27 23:39:53 +00002291 /* Copy all entries which precede 'me'. */
2292 for (i = 0; i < n; i++) {
2293 ScalarTS* here = &vts->ts[i];
2294 if (UNLIKELY(here->thrid >= me_thrid))
2295 break;
2296 UInt hi = out->usedTS++;
2297 out->ts[hi] = *here;
2298 }
2299
2300 /* 'i' now indicates the next entry to copy, if any.
2301 There are 3 possibilities:
2302 (a) there is no next entry (we used them all up already):
2303 add (me_thrid,1) to the output, and quit
2304 (b) there is a next entry, and its thrid > me_thrid:
2305 add (me_thrid,1) to the output, then copy the remaining entries
2306 (c) there is a next entry, and its thrid == me_thrid:
2307 copy it to the output but increment its timestamp value.
2308 Then copy the remaining entries. (c) is the common case.
2309 */
2310 tl_assert(i >= 0 && i <= n);
2311 if (i == n) { /* case (a) */
sewardj7aa38a92011-02-27 23:04:12 +00002312 UInt hi = out->usedTS++;
2313 out->ts[hi].thrid = me_thrid;
2314 out->ts[hi].tym = 1;
sewardj555fc572011-02-27 23:39:53 +00002315 } else {
2316 /* cases (b) and (c) */
2317 ScalarTS* here = &vts->ts[i];
2318 if (me_thrid == here->thrid) { /* case (c) */
sewardj7aa38a92011-02-27 23:04:12 +00002319 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
sewardje4cce742011-02-24 15:25:24 +00002320 /* We're hosed. We have to stop. */
2321 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2322 }
sewardj7aa38a92011-02-27 23:04:12 +00002323 UInt hi = out->usedTS++;
2324 out->ts[hi].thrid = here->thrid;
2325 out->ts[hi].tym = here->tym + 1;
sewardjf98e1c02008-10-25 16:22:41 +00002326 i++;
sewardj555fc572011-02-27 23:39:53 +00002327 found = True;
2328 } else { /* case (b) */
sewardj7aa38a92011-02-27 23:04:12 +00002329 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002330 out->ts[hi].thrid = me_thrid;
2331 out->ts[hi].tym = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002332 }
sewardj555fc572011-02-27 23:39:53 +00002333 /* And copy any remaining entries. */
sewardjf98e1c02008-10-25 16:22:41 +00002334 for (/*keepgoing*/; i < n; i++) {
sewardj555fc572011-02-27 23:39:53 +00002335 ScalarTS* here2 = &vts->ts[i];
sewardj7aa38a92011-02-27 23:04:12 +00002336 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002337 out->ts[hi] = *here2;
sewardjf98e1c02008-10-25 16:22:41 +00002338 }
2339 }
sewardj555fc572011-02-27 23:39:53 +00002340
sewardj7aa38a92011-02-27 23:04:12 +00002341 tl_assert(is_sane_VTS(out));
2342 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2343 tl_assert(out->usedTS <= out->sizeTS);
sewardjf98e1c02008-10-25 16:22:41 +00002344}
2345
2346
2347/* Return a new VTS constructed as the join (max) of the 2 args.
2348 Neither arg is modified.
2349*/
sewardj7aa38a92011-02-27 23:04:12 +00002350static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002351{
sewardj7aa38a92011-02-27 23:04:12 +00002352 UInt ia, ib, useda, usedb;
sewardjf98e1c02008-10-25 16:22:41 +00002353 ULong tyma, tymb, tymMax;
sewardje4cce742011-02-24 15:25:24 +00002354 ThrID thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002355 UInt ncommon = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002356
sewardjc8028ad2010-05-05 09:34:42 +00002357 stats__vts__join++;
2358
sewardj7aa38a92011-02-27 23:04:12 +00002359 tl_assert(a);
2360 tl_assert(b);
2361 useda = a->usedTS;
2362 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002363
sewardj7aa38a92011-02-27 23:04:12 +00002364 tl_assert(out);
2365 tl_assert(out->usedTS == 0);
2366 /* overly conservative test, but doing better involves comparing
2367 the two VTSs, which we don't want to do at this point. */
2368 if (useda + usedb >= ThrID_MAX_VALID)
2369 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2370 tl_assert(out->sizeTS >= useda + usedb);
2371
sewardjf98e1c02008-10-25 16:22:41 +00002372 ia = ib = 0;
2373
2374 while (1) {
2375
sewardje4cce742011-02-24 15:25:24 +00002376 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2377 from a and b in order, where thrid is the next ThrID
sewardjf98e1c02008-10-25 16:22:41 +00002378 occurring in either a or b, and tyma/b are the relevant
2379 scalar timestamps, taking into account implicit zeroes. */
2380 tl_assert(ia >= 0 && ia <= useda);
2381 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002382
njn4c245e52009-03-15 23:25:38 +00002383 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002384 /* both empty - done */
2385 break;
njn4c245e52009-03-15 23:25:38 +00002386
2387 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002388 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002389 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002390 thrid = tmpb->thrid;
2391 tyma = 0;
2392 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002393 ib++;
njn4c245e52009-03-15 23:25:38 +00002394
2395 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002396 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002397 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002398 thrid = tmpa->thrid;
2399 tyma = tmpa->tym;
2400 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002401 ia++;
njn4c245e52009-03-15 23:25:38 +00002402
2403 } else {
sewardje4cce742011-02-24 15:25:24 +00002404 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002405 ScalarTS* tmpa = &a->ts[ia];
2406 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002407 if (tmpa->thrid < tmpb->thrid) {
2408 /* a has the lowest unconsidered ThrID */
2409 thrid = tmpa->thrid;
2410 tyma = tmpa->tym;
2411 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002412 ia++;
sewardje4cce742011-02-24 15:25:24 +00002413 } else if (tmpa->thrid > tmpb->thrid) {
2414 /* b has the lowest unconsidered ThrID */
2415 thrid = tmpb->thrid;
2416 tyma = 0;
2417 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002418 ib++;
2419 } else {
sewardje4cce742011-02-24 15:25:24 +00002420 /* they both next mention the same ThrID */
2421 tl_assert(tmpa->thrid == tmpb->thrid);
2422 thrid = tmpa->thrid; /* == tmpb->thrid */
2423 tyma = tmpa->tym;
2424 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002425 ia++;
2426 ib++;
sewardj7aa38a92011-02-27 23:04:12 +00002427 ncommon++;
sewardjf98e1c02008-10-25 16:22:41 +00002428 }
2429 }
2430
2431 /* having laboriously determined (thr, tyma, tymb), do something
2432 useful with it. */
2433 tymMax = tyma > tymb ? tyma : tymb;
2434 if (tymMax > 0) {
sewardj7aa38a92011-02-27 23:04:12 +00002435 UInt hi = out->usedTS++;
2436 out->ts[hi].thrid = thrid;
2437 out->ts[hi].tym = tymMax;
sewardjf98e1c02008-10-25 16:22:41 +00002438 }
2439
2440 }
2441
sewardj7aa38a92011-02-27 23:04:12 +00002442 tl_assert(is_sane_VTS(out));
2443 tl_assert(out->usedTS <= out->sizeTS);
2444 tl_assert(out->usedTS == useda + usedb - ncommon);
sewardjf98e1c02008-10-25 16:22:41 +00002445}
2446
2447
sewardje4cce742011-02-24 15:25:24 +00002448/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2449 they are, or the first ThrID for which they are not (no valid ThrID
2450 has the value zero). This rather strange convention is used
2451 because sometimes we want to know the actual index at which they
2452 first differ. */
2453static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002454{
sewardj23f12002009-07-24 08:45:08 +00002455 Word ia, ib, useda, usedb;
2456 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00002457
sewardjc8028ad2010-05-05 09:34:42 +00002458 stats__vts__cmpLEQ++;
2459
sewardj7aa38a92011-02-27 23:04:12 +00002460 tl_assert(a);
2461 tl_assert(b);
2462 useda = a->usedTS;
2463 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002464
2465 ia = ib = 0;
2466
2467 while (1) {
2468
njn4c245e52009-03-15 23:25:38 +00002469 /* This logic is to enumerate doubles (tyma, tymb) drawn
2470 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00002471 scalar timestamps, taking into account implicit zeroes. */
sewardje4cce742011-02-24 15:25:24 +00002472 ThrID thrid;
sewardj23f12002009-07-24 08:45:08 +00002473
sewardjf98e1c02008-10-25 16:22:41 +00002474 tl_assert(ia >= 0 && ia <= useda);
2475 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002476
njn4c245e52009-03-15 23:25:38 +00002477 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002478 /* both empty - done */
2479 break;
njn4c245e52009-03-15 23:25:38 +00002480
2481 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002482 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002483 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002484 tyma = 0;
2485 tymb = tmpb->tym;
2486 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002487 ib++;
njn4c245e52009-03-15 23:25:38 +00002488
2489 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002490 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002491 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002492 tyma = tmpa->tym;
2493 thrid = tmpa->thrid;
2494 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002495 ia++;
njn4c245e52009-03-15 23:25:38 +00002496
2497 } else {
sewardje4cce742011-02-24 15:25:24 +00002498 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002499 ScalarTS* tmpa = &a->ts[ia];
2500 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002501 if (tmpa->thrid < tmpb->thrid) {
2502 /* a has the lowest unconsidered ThrID */
2503 tyma = tmpa->tym;
2504 thrid = tmpa->thrid;
2505 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002506 ia++;
2507 }
2508 else
sewardje4cce742011-02-24 15:25:24 +00002509 if (tmpa->thrid > tmpb->thrid) {
2510 /* b has the lowest unconsidered ThrID */
2511 tyma = 0;
2512 tymb = tmpb->tym;
2513 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002514 ib++;
2515 } else {
sewardje4cce742011-02-24 15:25:24 +00002516 /* they both next mention the same ThrID */
2517 tl_assert(tmpa->thrid == tmpb->thrid);
2518 tyma = tmpa->tym;
2519 thrid = tmpa->thrid;
2520 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002521 ia++;
2522 ib++;
2523 }
2524 }
2525
njn4c245e52009-03-15 23:25:38 +00002526 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00002527 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00002528 if (tyma > tymb) {
2529 /* not LEQ at this index. Quit, since the answer is
2530 determined already. */
sewardje4cce742011-02-24 15:25:24 +00002531 tl_assert(thrid >= 1024);
2532 return thrid;
sewardj23f12002009-07-24 08:45:08 +00002533 }
sewardjf98e1c02008-10-25 16:22:41 +00002534 }
2535
sewardje4cce742011-02-24 15:25:24 +00002536 return 0; /* all points are LEQ => return an invalid ThrID */
sewardjf98e1c02008-10-25 16:22:41 +00002537}
2538
2539
2540/* Compute an arbitrary structural (total) ordering on the two args,
2541 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00002542 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2543 performance critical so there is some effort expended to make it sa
2544 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00002545*/
2546Word VTS__cmp_structural ( VTS* a, VTS* b )
2547{
2548 /* We just need to generate an arbitrary total ordering based on
2549 a->ts and b->ts. Preferably do it in a way which comes across likely
2550 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00002551 Word i;
2552 Word useda = 0, usedb = 0;
2553 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002554
sewardjc8028ad2010-05-05 09:34:42 +00002555 stats__vts__cmp_structural++;
2556
2557 tl_assert(a);
2558 tl_assert(b);
2559
sewardj7aa38a92011-02-27 23:04:12 +00002560 ctsa = &a->ts[0]; useda = a->usedTS;
2561 ctsb = &b->ts[0]; usedb = b->usedTS;
sewardjc8028ad2010-05-05 09:34:42 +00002562
2563 if (LIKELY(useda == usedb)) {
2564 ScalarTS *tmpa = NULL, *tmpb = NULL;
2565 stats__vts__cmp_structural_slow++;
2566 /* Same length vectors. Find the first difference, if any, as
2567 fast as possible. */
2568 for (i = 0; i < useda; i++) {
2569 tmpa = &ctsa[i];
2570 tmpb = &ctsb[i];
sewardje4cce742011-02-24 15:25:24 +00002571 if (LIKELY(tmpa->tym == tmpb->tym
2572 && tmpa->thrid == tmpb->thrid))
sewardjc8028ad2010-05-05 09:34:42 +00002573 continue;
2574 else
2575 break;
2576 }
2577 if (UNLIKELY(i == useda)) {
2578 /* They're identical. */
2579 return 0;
2580 } else {
2581 tl_assert(i >= 0 && i < useda);
2582 if (tmpa->tym < tmpb->tym) return -1;
2583 if (tmpa->tym > tmpb->tym) return 1;
sewardje4cce742011-02-24 15:25:24 +00002584 if (tmpa->thrid < tmpb->thrid) return -1;
2585 if (tmpa->thrid > tmpb->thrid) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002586 /* we just established them as non-identical, hence: */
2587 }
2588 /*NOTREACHED*/
2589 tl_assert(0);
2590 }
sewardjf98e1c02008-10-25 16:22:41 +00002591
2592 if (useda < usedb) return -1;
2593 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002594 /*NOTREACHED*/
2595 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00002596}
2597
2598
florianb28fe892014-10-28 20:52:07 +00002599/* Debugging only. Display the given VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002600*/
florianb28fe892014-10-28 20:52:07 +00002601static void VTS__show ( const VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002602{
sewardjf98e1c02008-10-25 16:22:41 +00002603 Word i, n;
florian4367abe2015-02-28 09:22:09 +00002604 tl_assert(vts);
florianb28fe892014-10-28 20:52:07 +00002605
2606 VG_(printf)("[");
sewardj7aa38a92011-02-27 23:04:12 +00002607 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002608 for (i = 0; i < n; i++) {
florianb28fe892014-10-28 20:52:07 +00002609 const ScalarTS *st = &vts->ts[i];
2610 VG_(printf)(i < n-1 ? "%u:%llu " : "%u:%llu", st->thrid, (ULong)st->tym);
sewardjf98e1c02008-10-25 16:22:41 +00002611 }
florianb28fe892014-10-28 20:52:07 +00002612 VG_(printf)("]");
sewardjf98e1c02008-10-25 16:22:41 +00002613}
2614
2615
2616/* Debugging only. Return vts[index], so to speak.
2617*/
sewardj7aa38a92011-02-27 23:04:12 +00002618ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2619{
sewardjf98e1c02008-10-25 16:22:41 +00002620 UWord i, n;
sewardje4cce742011-02-24 15:25:24 +00002621 ThrID idx_thrid = Thr__to_ThrID(idx);
sewardjc8028ad2010-05-05 09:34:42 +00002622 stats__vts__indexat_slow++;
florian4367abe2015-02-28 09:22:09 +00002623 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002624 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002625 for (i = 0; i < n; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002626 ScalarTS* st = &vts->ts[i];
sewardje4cce742011-02-24 15:25:24 +00002627 if (st->thrid == idx_thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002628 return st->tym;
2629 }
2630 return 0;
2631}
2632
2633
sewardjffce8152011-06-24 10:09:41 +00002634/* See comment on prototype above.
2635*/
2636static void VTS__declare_thread_very_dead ( Thr* thr )
2637{
2638 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2639
2640 tl_assert(thr->llexit_done);
2641 tl_assert(thr->joinedwith_done);
2642
2643 ThrID nyu;
2644 nyu = Thr__to_ThrID(thr);
philippec3508652015-03-28 12:01:58 +00002645 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
sewardjffce8152011-06-24 10:09:41 +00002646
2647 /* We can only get here if we're assured that we'll never again
2648 need to look at this thread's ::viR or ::viW. Set them to
2649 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2650 mostly so that we don't wind up pruning them (as that would be
2651 nonsensical: the only interesting ScalarTS entry for a dead
2652 thread is its own index, and the pruning will remove that.). */
2653 VtsID__rcdec(thr->viR);
2654 VtsID__rcdec(thr->viW);
2655 thr->viR = VtsID_INVALID;
2656 thr->viW = VtsID_INVALID;
2657}
2658
2659
sewardjf98e1c02008-10-25 16:22:41 +00002660/////////////////////////////////////////////////////////////////
2661/////////////////////////////////////////////////////////////////
2662// //
2663// SECTION END vts primitives //
2664// //
2665/////////////////////////////////////////////////////////////////
2666/////////////////////////////////////////////////////////////////
2667
2668
2669
2670/////////////////////////////////////////////////////////////////
2671/////////////////////////////////////////////////////////////////
2672// //
2673// SECTION BEGIN main library //
2674// //
2675/////////////////////////////////////////////////////////////////
2676/////////////////////////////////////////////////////////////////
2677
2678
2679/////////////////////////////////////////////////////////
2680// //
2681// VTS set //
2682// //
2683/////////////////////////////////////////////////////////
2684
sewardjffce8152011-06-24 10:09:41 +00002685static WordFM* /* WordFM VTS* void */ vts_set = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002686
2687static void vts_set_init ( void )
2688{
2689 tl_assert(!vts_set);
2690 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2691 HG_(free),
2692 (Word(*)(UWord,UWord))VTS__cmp_structural );
sewardjf98e1c02008-10-25 16:22:41 +00002693}
2694
sewardj7aa38a92011-02-27 23:04:12 +00002695/* Given a VTS, look in vts_set to see if we already have a
2696 structurally identical one. If yes, return the pair (True, pointer
2697 to the existing one). If no, clone this one, add the clone to the
2698 set, and return (False, pointer to the clone). */
2699static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002700{
2701 UWord keyW, valW;
sewardj7aa38a92011-02-27 23:04:12 +00002702 stats__vts_set__focaa++;
2703 tl_assert(cand->id == VtsID_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00002704 /* lookup cand (by value) */
2705 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2706 /* found it */
2707 tl_assert(valW == 0);
2708 /* if this fails, cand (by ref) was already present (!) */
2709 tl_assert(keyW != (UWord)cand);
sewardj7aa38a92011-02-27 23:04:12 +00002710 *res = (VTS*)keyW;
2711 return True;
sewardjf98e1c02008-10-25 16:22:41 +00002712 } else {
sewardj7aa38a92011-02-27 23:04:12 +00002713 /* not present. Clone, add and return address of clone. */
2714 stats__vts_set__focaa_a++;
2715 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2716 tl_assert(clone != cand);
2717 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2718 *res = clone;
2719 return False;
sewardjf98e1c02008-10-25 16:22:41 +00002720 }
2721}
2722
2723
2724/////////////////////////////////////////////////////////
2725// //
2726// VTS table //
2727// //
2728/////////////////////////////////////////////////////////
2729
2730static void VtsID__invalidate_caches ( void ); /* fwds */
2731
2732/* A type to hold VTS table entries. Invariants:
2733 If .vts == NULL, then this entry is not in use, so:
2734 - .rc == 0
2735 - this entry is on the freelist (unfortunately, does not imply
philippea1ac2f42015-05-01 17:12:00 +00002736 any constraints on value for u.freelink)
sewardjf98e1c02008-10-25 16:22:41 +00002737 If .vts != NULL, then this entry is in use:
2738 - .vts is findable in vts_set
2739 - .vts->id == this entry number
2740 - no specific value for .rc (even 0 is OK)
philippea1ac2f42015-05-01 17:12:00 +00002741 - this entry is not on freelist, so u.freelink == VtsID_INVALID
sewardjf98e1c02008-10-25 16:22:41 +00002742*/
2743typedef
2744 struct {
2745 VTS* vts; /* vts, in vts_set */
2746 UWord rc; /* reference count - enough for entire aspace */
philippea1ac2f42015-05-01 17:12:00 +00002747 union {
2748 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2749 VtsID remap; /* used only during pruning, for used entries */
2750 } u;
2751 /* u.freelink only used when vts == NULL,
2752 u.remap only used when vts != NULL, during pruning. */
sewardjf98e1c02008-10-25 16:22:41 +00002753 }
2754 VtsTE;
2755
2756/* The VTS table. */
2757static XArray* /* of VtsTE */ vts_tab = NULL;
2758
2759/* An index into the VTS table, indicating the start of the list of
2760 free (available for use) entries. If the list is empty, this is
2761 VtsID_INVALID. */
2762static VtsID vts_tab_freelist = VtsID_INVALID;
2763
2764/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2765 vts_tab equals or exceeds this size. After GC, the value here is
2766 set appropriately so as to check for the next GC point. */
2767static Word vts_next_GC_at = 1000;
2768
2769static void vts_tab_init ( void )
2770{
florian91ed8cc2014-09-15 18:50:17 +00002771 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2772 HG_(free), sizeof(VtsTE) );
2773 vts_tab_freelist = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002774}
2775
2776/* Add ii to the free list, checking that it looks out-of-use. */
2777static void add_to_free_list ( VtsID ii )
2778{
2779 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2780 tl_assert(ie->vts == NULL);
2781 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002782 tl_assert(ie->u.freelink == VtsID_INVALID);
2783 ie->u.freelink = vts_tab_freelist;
sewardjf98e1c02008-10-25 16:22:41 +00002784 vts_tab_freelist = ii;
2785}
2786
2787/* Get an entry from the free list. This will return VtsID_INVALID if
2788 the free list is empty. */
2789static VtsID get_from_free_list ( void )
2790{
2791 VtsID ii;
2792 VtsTE* ie;
2793 if (vts_tab_freelist == VtsID_INVALID)
2794 return VtsID_INVALID;
2795 ii = vts_tab_freelist;
2796 ie = VG_(indexXA)( vts_tab, ii );
2797 tl_assert(ie->vts == NULL);
2798 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002799 vts_tab_freelist = ie->u.freelink;
sewardjf98e1c02008-10-25 16:22:41 +00002800 return ii;
2801}
2802
2803/* Produce a new VtsID that can be used, either by getting it from
2804 the freelist, or, if that is empty, by expanding vts_tab. */
2805static VtsID get_new_VtsID ( void )
2806{
2807 VtsID ii;
2808 VtsTE te;
2809 ii = get_from_free_list();
2810 if (ii != VtsID_INVALID)
2811 return ii;
2812 te.vts = NULL;
2813 te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002814 te.u.freelink = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002815 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2816 return ii;
2817}
2818
2819
2820/* Indirect callback from lib_zsm. */
2821static void VtsID__rcinc ( VtsID ii )
2822{
2823 VtsTE* ie;
2824 /* VG_(indexXA) does a range check for us */
2825 ie = VG_(indexXA)( vts_tab, ii );
2826 tl_assert(ie->vts); /* else it's not in use */
2827 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2828 tl_assert(ie->vts->id == ii);
2829 ie->rc++;
2830}
2831
2832/* Indirect callback from lib_zsm. */
2833static void VtsID__rcdec ( VtsID ii )
2834{
2835 VtsTE* ie;
2836 /* VG_(indexXA) does a range check for us */
2837 ie = VG_(indexXA)( vts_tab, ii );
2838 tl_assert(ie->vts); /* else it's not in use */
2839 tl_assert(ie->rc > 0); /* else RC snafu */
2840 tl_assert(ie->vts->id == ii);
2841 ie->rc--;
2842}
2843
2844
sewardj7aa38a92011-02-27 23:04:12 +00002845/* Look up 'cand' in our collection of VTSs. If present, return the
2846 VtsID for the pre-existing version. If not present, clone it, add
2847 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2848 it, and return that. */
2849static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002850{
sewardj7aa38a92011-02-27 23:04:12 +00002851 VTS* in_tab = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002852 tl_assert(cand->id == VtsID_INVALID);
sewardj7aa38a92011-02-27 23:04:12 +00002853 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2854 tl_assert(in_tab);
2855 if (already_have) {
2856 /* We already have a copy of 'cand'. Use that. */
sewardjf98e1c02008-10-25 16:22:41 +00002857 VtsTE* ie;
sewardj7aa38a92011-02-27 23:04:12 +00002858 tl_assert(in_tab->id != VtsID_INVALID);
2859 ie = VG_(indexXA)( vts_tab, in_tab->id );
2860 tl_assert(ie->vts == in_tab);
2861 return in_tab->id;
sewardjf98e1c02008-10-25 16:22:41 +00002862 } else {
2863 VtsID ii = get_new_VtsID();
2864 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
sewardj7aa38a92011-02-27 23:04:12 +00002865 ie->vts = in_tab;
sewardjf98e1c02008-10-25 16:22:41 +00002866 ie->rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002867 ie->u.freelink = VtsID_INVALID;
sewardj7aa38a92011-02-27 23:04:12 +00002868 in_tab->id = ii;
sewardjf98e1c02008-10-25 16:22:41 +00002869 return ii;
2870 }
2871}
2872
2873
florian6bd9dc12012-11-23 16:17:43 +00002874static void show_vts_stats ( const HChar* caller )
sewardjf98e1c02008-10-25 16:22:41 +00002875{
2876 UWord nSet, nTab, nLive;
2877 ULong totrc;
2878 UWord n, i;
2879 nSet = VG_(sizeFM)( vts_set );
2880 nTab = VG_(sizeXA)( vts_tab );
2881 totrc = 0;
2882 nLive = 0;
2883 n = VG_(sizeXA)( vts_tab );
2884 for (i = 0; i < n; i++) {
2885 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2886 if (ie->vts) {
2887 nLive++;
2888 totrc += (ULong)ie->rc;
2889 } else {
2890 tl_assert(ie->rc == 0);
2891 }
2892 }
2893 VG_(printf)(" show_vts_stats %s\n", caller);
2894 VG_(printf)(" vts_tab size %4lu\n", nTab);
2895 VG_(printf)(" vts_tab live %4lu\n", nLive);
2896 VG_(printf)(" vts_set size %4lu\n", nSet);
2897 VG_(printf)(" total rc %4llu\n", totrc);
2898}
2899
sewardjffce8152011-06-24 10:09:41 +00002900
2901/* --- Helpers for VtsID pruning --- */
2902
2903static
2904void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2905 /*MOD*/XArray* /* of VtsTE */ new_tab,
2906 VtsID* ii )
2907{
2908 VtsTE *old_te, *new_te;
2909 VtsID old_id, new_id;
2910 /* We're relying here on VG_(indexXA)'s range checking to assert on
2911 any stupid values, in particular *ii == VtsID_INVALID. */
2912 old_id = *ii;
2913 old_te = VG_(indexXA)( old_tab, old_id );
2914 old_te->rc--;
philippea1ac2f42015-05-01 17:12:00 +00002915 new_id = old_te->u.remap;
sewardjffce8152011-06-24 10:09:41 +00002916 new_te = VG_(indexXA)( new_tab, new_id );
2917 new_te->rc++;
2918 *ii = new_id;
2919}
2920
2921static
2922void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2923 /*MOD*/XArray* /* of VtsTE */ new_tab,
2924 SVal* s )
2925{
2926 SVal old_sv, new_sv;
2927 old_sv = *s;
2928 if (SVal__isC(old_sv)) {
2929 VtsID rMin, wMin;
2930 rMin = SVal__unC_Rmin(old_sv);
2931 wMin = SVal__unC_Wmin(old_sv);
2932 remap_VtsID( old_tab, new_tab, &rMin );
2933 remap_VtsID( old_tab, new_tab, &wMin );
2934 new_sv = SVal__mkC( rMin, wMin );
2935 *s = new_sv;
2936 }
2937}
2938
2939
sewardjf98e1c02008-10-25 16:22:41 +00002940/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002941__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002942static void vts_tab__do_GC ( Bool show_stats )
2943{
2944 UWord i, nTab, nLive, nFreed;
2945
sewardjffce8152011-06-24 10:09:41 +00002946 /* ---------- BEGIN VTS GC ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00002947 /* check this is actually necessary. */
2948 tl_assert(vts_tab_freelist == VtsID_INVALID);
2949
2950 /* empty the caches for partial order checks and binary joins. We
2951 could do better and prune out the entries to be deleted, but it
2952 ain't worth the hassle. */
2953 VtsID__invalidate_caches();
2954
2955 /* First, make the reference counts up to date. */
2956 zsm_flush_cache();
2957
2958 nTab = VG_(sizeXA)( vts_tab );
2959
2960 if (show_stats) {
2961 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2962 show_vts_stats("before GC");
2963 }
2964
sewardjffce8152011-06-24 10:09:41 +00002965 /* Now we can inspect the entire vts_tab. Any entries with zero
2966 .rc fields are now no longer in use and can be put back on the
sewardjf98e1c02008-10-25 16:22:41 +00002967 free list, removed from vts_set, and deleted. */
2968 nFreed = 0;
2969 for (i = 0; i < nTab; i++) {
2970 Bool present;
sewardjffce8152011-06-24 10:09:41 +00002971 UWord oldK = 0, oldV = 12345;
sewardjf98e1c02008-10-25 16:22:41 +00002972 VtsTE* te = VG_(indexXA)( vts_tab, i );
2973 if (te->vts == NULL) {
2974 tl_assert(te->rc == 0);
2975 continue; /* already on the free list (presumably) */
2976 }
2977 if (te->rc > 0)
2978 continue; /* in use */
2979 /* Ok, we got one we can free. */
2980 tl_assert(te->vts->id == i);
2981 /* first, remove it from vts_set. */
2982 present = VG_(delFromFM)( vts_set,
2983 &oldK, &oldV, (UWord)te->vts );
2984 tl_assert(present); /* else it isn't in vts_set ?! */
2985 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2986 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2987 /* now free the VTS itself */
2988 VTS__delete(te->vts);
2989 te->vts = NULL;
2990 /* and finally put this entry on the free list */
philippea1ac2f42015-05-01 17:12:00 +00002991 tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
sewardjf98e1c02008-10-25 16:22:41 +00002992 add_to_free_list( i );
2993 nFreed++;
2994 }
2995
2996 /* Now figure out when the next GC should be. We'll allow the
2997 number of VTSs to double before GCing again. Except of course
2998 that since we can't (or, at least, don't) shrink vts_tab, we
2999 can't set the threshhold value smaller than it. */
3000 tl_assert(nFreed <= nTab);
3001 nLive = nTab - nFreed;
3002 tl_assert(nLive >= 0 && nLive <= nTab);
3003 vts_next_GC_at = 2 * nLive;
3004 if (vts_next_GC_at < nTab)
3005 vts_next_GC_at = nTab;
3006
3007 if (show_stats) {
3008 show_vts_stats("after GC");
3009 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
3010 }
3011
philippe2bd23262015-05-11 20:56:49 +00003012 stats__vts_tab_GC++;
sewardj5e2ac3b2009-08-11 10:39:25 +00003013 if (VG_(clo_stats)) {
sewardjf98e1c02008-10-25 16:22:41 +00003014 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00003015 VG_(message)(Vg_DebugMsg,
philippef54cb662015-05-10 22:19:31 +00003016 "libhb: VTS GC: #%lu old size %lu live %lu (%2llu%%)\n",
3017 stats__vts_tab_GC,
3018 nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00003019 }
sewardjffce8152011-06-24 10:09:41 +00003020 /* ---------- END VTS GC ---------- */
3021
3022 /* Decide whether to do VTS pruning. We have one of three
3023 settings. */
3024 static UInt pruning_auto_ctr = 0; /* do not make non-static */
3025
3026 Bool do_pruning = False;
3027 switch (HG_(clo_vts_pruning)) {
3028 case 0: /* never */
3029 break;
3030 case 1: /* auto */
3031 do_pruning = (++pruning_auto_ctr % 5) == 0;
3032 break;
3033 case 2: /* always */
3034 do_pruning = True;
3035 break;
3036 default:
3037 tl_assert(0);
3038 }
3039
3040 /* The rest of this routine only handles pruning, so we can
3041 quit at this point if it is not to be done. */
3042 if (!do_pruning)
3043 return;
philippec3508652015-03-28 12:01:58 +00003044 /* No need to do pruning if no thread died since the last pruning as
3045 no VtsTE can be pruned. */
3046 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
3047 return;
sewardjffce8152011-06-24 10:09:41 +00003048
3049 /* ---------- BEGIN VTS PRUNING ---------- */
philippec3508652015-03-28 12:01:58 +00003050 /* Sort and check the very dead threads that died since the last pruning.
3051 Sorting is used for the check and so that we can quickly look
sewardjffce8152011-06-24 10:09:41 +00003052 up the dead-thread entries as we work through the VTSs. */
philippec3508652015-03-28 12:01:58 +00003053 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003054
3055 /* We will run through the old table, and create a new table and
philippea1ac2f42015-05-01 17:12:00 +00003056 set, at the same time setting the u.remap entries in the old
sewardjffce8152011-06-24 10:09:41 +00003057 table to point to the new entries. Then, visit every VtsID in
3058 the system, and replace all of them with new ones, using the
philippea1ac2f42015-05-01 17:12:00 +00003059 u.remap entries in the old table. Finally, we can delete the old
sewardjffce8152011-06-24 10:09:41 +00003060 table and set. */
3061
3062 XArray* /* of VtsTE */ new_tab
3063 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
3064 HG_(free), sizeof(VtsTE) );
3065
3066 /* WordFM VTS* void */
3067 WordFM* new_set
3068 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
3069 HG_(free),
3070 (Word(*)(UWord,UWord))VTS__cmp_structural );
3071
3072 /* Visit each old VTS. For each one:
3073
3074 * make a pruned version
3075
3076 * search new_set for the pruned version, yielding either
3077 Nothing (not present) or the new VtsID for it.
3078
3079 * if not present, allocate a new VtsID for it, insert (pruned
3080 VTS, new VtsID) in the tree, and set
3081 remap_table[old VtsID] = new VtsID.
3082
3083 * if present, set remap_table[old VtsID] = new VtsID, where
3084 new VtsID was determined by the tree lookup. Then free up
3085 the clone.
3086 */
3087
3088 UWord nBeforePruning = 0, nAfterPruning = 0;
3089 UWord nSTSsBefore = 0, nSTSsAfter = 0;
3090 VtsID new_VtsID_ctr = 0;
3091
3092 for (i = 0; i < nTab; i++) {
3093
3094 /* For each old VTS .. */
3095 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
3096 VTS* old_vts = old_te->vts;
sewardjffce8152011-06-24 10:09:41 +00003097
3098 /* Skip it if not in use */
3099 if (old_te->rc == 0) {
3100 tl_assert(old_vts == NULL);
3101 continue;
3102 }
philippea1ac2f42015-05-01 17:12:00 +00003103 tl_assert(old_te->u.remap == VtsID_INVALID);
sewardjffce8152011-06-24 10:09:41 +00003104 tl_assert(old_vts != NULL);
3105 tl_assert(old_vts->id == i);
3106 tl_assert(old_vts->ts != NULL);
3107
3108 /* It is in use. Make a pruned version. */
3109 nBeforePruning++;
3110 nSTSsBefore += old_vts->usedTS;
3111 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
philippec3508652015-03-28 12:01:58 +00003112 old_vts, verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003113 tl_assert(new_vts->sizeTS == new_vts->usedTS);
3114 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
3115 == 0x0ddC0ffeeBadF00dULL);
3116
3117 /* Get rid of the old VTS and the tree entry. It's a bit more
3118 complex to incrementally delete the VTSs now than to nuke
3119 them all after we're done, but the upside is that we don't
3120 wind up temporarily storing potentially two complete copies
3121 of each VTS and hence spiking memory use. */
3122 UWord oldK = 0, oldV = 12345;
3123 Bool present = VG_(delFromFM)( vts_set,
3124 &oldK, &oldV, (UWord)old_vts );
3125 tl_assert(present); /* else it isn't in vts_set ?! */
3126 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3127 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
3128 /* now free the VTS itself */
3129 VTS__delete(old_vts);
3130 old_te->vts = NULL;
3131 old_vts = NULL;
3132
3133 /* NO MENTIONS of old_vts allowed beyond this point. */
3134
3135 /* Ok, we have the pruned copy in new_vts. See if a
3136 structurally identical version is already present in new_set.
3137 If so, delete the one we just made and move on; if not, add
3138 it. */
3139 VTS* identical_version = NULL;
3140 UWord valW = 12345;
3141 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
3142 (UWord)new_vts)) {
3143 // already have it
3144 tl_assert(valW == 0);
3145 tl_assert(identical_version != NULL);
3146 tl_assert(identical_version != new_vts);
3147 VTS__delete(new_vts);
3148 new_vts = identical_version;
3149 tl_assert(new_vts->id != VtsID_INVALID);
3150 } else {
3151 tl_assert(valW == 12345);
3152 tl_assert(identical_version == NULL);
3153 new_vts->id = new_VtsID_ctr++;
3154 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
3155 tl_assert(!b);
3156 VtsTE new_te;
3157 new_te.vts = new_vts;
3158 new_te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00003159 new_te.u.freelink = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003160 Word j = VG_(addToXA)( new_tab, &new_te );
3161 tl_assert(j <= i);
3162 tl_assert(j == new_VtsID_ctr - 1);
3163 // stats
3164 nAfterPruning++;
3165 nSTSsAfter += new_vts->usedTS;
3166 }
philippea1ac2f42015-05-01 17:12:00 +00003167 old_te->u.remap = new_vts->id;
sewardjffce8152011-06-24 10:09:41 +00003168
3169 } /* for (i = 0; i < nTab; i++) */
3170
philippec3508652015-03-28 12:01:58 +00003171 /* Move very dead thread from verydead_thread_table_not_pruned to
3172 verydead_thread_table. Sort and check verydead_thread_table
3173 to verify a thread was reported very dead only once. */
3174 {
3175 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
3176
3177 for (i = 0; i < nBT; i++) {
3178 ThrID thrid =
3179 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
3180 VG_(addToXA)( verydead_thread_table, &thrid );
3181 }
3182 verydead_thread_table_sort_and_check (verydead_thread_table);
3183 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
3184 }
3185
sewardjffce8152011-06-24 10:09:41 +00003186 /* At this point, we have:
philippea1ac2f42015-05-01 17:12:00 +00003187 * the old VTS table, with its u.remap entries set,
sewardjffce8152011-06-24 10:09:41 +00003188 and with all .vts == NULL.
3189 * the old VTS tree should be empty, since it and the old VTSs
3190 it contained have been incrementally deleted was we worked
3191 through the old table.
philippea1ac2f42015-05-01 17:12:00 +00003192 * the new VTS table, with all .rc == 0, all u.freelink and u.remap
sewardjffce8152011-06-24 10:09:41 +00003193 == VtsID_INVALID.
3194 * the new VTS tree.
3195 */
3196 tl_assert( VG_(sizeFM)(vts_set) == 0 );
3197
3198 /* Now actually apply the mapping. */
3199 /* Visit all the VtsIDs in the entire system. Where do we expect
3200 to find them?
3201 (a) in shadow memory -- the LineZs and LineFs
3202 (b) in our collection of struct _Thrs.
3203 (c) in our collection of struct _SOs.
3204 Nowhere else, AFAICS. Not in the zsm cache, because that just
3205 got invalidated.
3206
philippea1ac2f42015-05-01 17:12:00 +00003207 Using the u.remap fields in vts_tab, map each old VtsID to a new
sewardjffce8152011-06-24 10:09:41 +00003208 VtsID. For each old VtsID, dec its rc; and for each new one,
3209 inc it. This sets up the new refcounts, and it also gives a
3210 cheap sanity check of the old ones: all old refcounts should be
3211 zero after this operation.
3212 */
3213
3214 /* Do the mappings for (a) above: iterate over the Primary shadow
3215 mem map (WordFM Addr SecMap*). */
3216 UWord secmapW = 0;
3217 VG_(initIterFM)( map_shmem );
3218 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3219 UWord j;
3220 SecMap* sm = (SecMap*)secmapW;
3221 tl_assert(sm->magic == SecMap_MAGIC);
3222 /* Deal with the LineZs */
3223 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3224 LineZ* lineZ = &sm->linesZ[i];
philippe71ed3c92015-05-17 19:32:42 +00003225 if (lineZ->dict[0] != SVal_INVALID) {
3226 for (j = 0; j < 4; j++)
3227 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3228 } else {
3229 LineF* lineF = SVal2Ptr (lineZ->dict[1]);
3230 for (j = 0; j < N_LINE_ARANGE; j++)
3231 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3232 }
sewardjffce8152011-06-24 10:09:41 +00003233 }
3234 }
3235 VG_(doneIterFM)( map_shmem );
3236
3237 /* Do the mappings for (b) above: visit our collection of struct
3238 _Thrs. */
3239 Thread* hgthread = get_admin_threads();
3240 tl_assert(hgthread);
3241 while (hgthread) {
3242 Thr* hbthr = hgthread->hbthr;
3243 tl_assert(hbthr);
3244 /* Threads that are listed in the prunable set have their viR
3245 and viW set to VtsID_INVALID, so we can't mess with them. */
3246 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3247 tl_assert(hbthr->viR == VtsID_INVALID);
3248 tl_assert(hbthr->viW == VtsID_INVALID);
3249 hgthread = hgthread->admin;
3250 continue;
3251 }
3252 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3253 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3254 hgthread = hgthread->admin;
3255 }
3256
3257 /* Do the mappings for (c) above: visit the struct _SOs. */
3258 SO* so = admin_SO;
3259 while (so) {
3260 if (so->viR != VtsID_INVALID)
3261 remap_VtsID( vts_tab, new_tab, &so->viR );
3262 if (so->viW != VtsID_INVALID)
3263 remap_VtsID( vts_tab, new_tab, &so->viW );
3264 so = so->admin_next;
3265 }
3266
3267 /* So, we're nearly done (with this incredibly complex operation).
3268 Check the refcounts for the old VtsIDs all fell to zero, as
3269 expected. Any failure is serious. */
3270 for (i = 0; i < nTab; i++) {
3271 VtsTE* te = VG_(indexXA)( vts_tab, i );
3272 tl_assert(te->vts == NULL);
3273 /* This is the assert proper. Note we're also asserting
philippea1ac2f42015-05-01 17:12:00 +00003274 zeroness for old entries which are unmapped. That's OK. */
sewardjffce8152011-06-24 10:09:41 +00003275 tl_assert(te->rc == 0);
3276 }
3277
3278 /* Install the new table and set. */
3279 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3280 vts_set = new_set;
3281 VG_(deleteXA)( vts_tab );
3282 vts_tab = new_tab;
3283
3284 /* The freelist of vts_tab entries is empty now, because we've
3285 compacted all of the live entries at the low end of the
3286 table. */
3287 vts_tab_freelist = VtsID_INVALID;
3288
3289 /* Sanity check vts_set and vts_tab. */
3290
3291 /* Because all the live entries got slid down to the bottom of vts_tab: */
3292 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3293
3294 /* Assert that the vts_tab and vts_set entries point at each other
3295 in the required way */
3296 UWord wordK = 0, wordV = 0;
3297 VG_(initIterFM)( vts_set );
3298 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3299 tl_assert(wordK != 0);
3300 tl_assert(wordV == 0);
3301 VTS* vts = (VTS*)wordK;
3302 tl_assert(vts->id != VtsID_INVALID);
3303 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3304 tl_assert(te->vts == vts);
3305 }
3306 VG_(doneIterFM)( vts_set );
3307
3308 /* Also iterate over the table, and check each entry is
3309 plausible. */
3310 nTab = VG_(sizeXA)( vts_tab );
3311 for (i = 0; i < nTab; i++) {
3312 VtsTE* te = VG_(indexXA)( vts_tab, i );
3313 tl_assert(te->vts);
3314 tl_assert(te->vts->id == i);
3315 tl_assert(te->rc > 0); /* 'cos we just GC'd */
philippea1ac2f42015-05-01 17:12:00 +00003316 tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3317 /* value of te->u.remap not relevant */
sewardjffce8152011-06-24 10:09:41 +00003318 }
3319
3320 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
philippe2bd23262015-05-11 20:56:49 +00003321 stats__vts_pruning++;
sewardjffce8152011-06-24 10:09:41 +00003322 if (VG_(clo_stats)) {
sewardjffce8152011-06-24 10:09:41 +00003323 tl_assert(nTab > 0);
3324 VG_(message)(
3325 Vg_DebugMsg,
philippe2bd23262015-05-11 20:56:49 +00003326 "libhb: VTS PR: #%lu before %lu (avg sz %lu) "
sewardjffce8152011-06-24 10:09:41 +00003327 "after %lu (avg sz %lu)\n",
philippe2bd23262015-05-11 20:56:49 +00003328 stats__vts_pruning,
sewardjffce8152011-06-24 10:09:41 +00003329 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3330 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3331 );
3332 }
sewardjffce8152011-06-24 10:09:41 +00003333 /* ---------- END VTS PRUNING ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00003334}
3335
3336
3337/////////////////////////////////////////////////////////
3338// //
3339// Vts IDs //
3340// //
3341/////////////////////////////////////////////////////////
3342
3343//////////////////////////
sewardj7aa38a92011-02-27 23:04:12 +00003344/* A temporary, max-sized VTS which is used as a temporary (the first
3345 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3346static VTS* temp_max_sized_VTS = NULL;
3347
3348//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00003349static ULong stats__cmpLEQ_queries = 0;
3350static ULong stats__cmpLEQ_misses = 0;
3351static ULong stats__join2_queries = 0;
3352static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003353
3354static inline UInt ROL32 ( UInt w, Int n ) {
3355 w = (w << n) | (w >> (32-n));
3356 return w;
3357}
3358static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3359 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3360 return hash % nTab;
3361}
3362
sewardj23f12002009-07-24 08:45:08 +00003363#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00003364static
sewardj23f12002009-07-24 08:45:08 +00003365 struct { VtsID vi1; VtsID vi2; Bool leq; }
3366 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00003367
3368#define N_JOIN2_CACHE 1023
3369static
3370 struct { VtsID vi1; VtsID vi2; VtsID res; }
3371 join2_cache[N_JOIN2_CACHE];
3372
3373static void VtsID__invalidate_caches ( void ) {
3374 Int i;
sewardj23f12002009-07-24 08:45:08 +00003375 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3376 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3377 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3378 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00003379 }
3380 for (i = 0; i < N_JOIN2_CACHE; i++) {
3381 join2_cache[i].vi1 = VtsID_INVALID;
3382 join2_cache[i].vi2 = VtsID_INVALID;
3383 join2_cache[i].res = VtsID_INVALID;
3384 }
3385}
3386//////////////////////////
3387
sewardjd52392d2008-11-08 20:36:26 +00003388//static Bool VtsID__is_valid ( VtsID vi ) {
3389// VtsTE* ve;
3390// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3391// return False;
3392// ve = VG_(indexXA)( vts_tab, vi );
3393// if (!ve->vts)
3394// return False;
3395// tl_assert(ve->vts->id == vi);
3396// return True;
3397//}
sewardjf98e1c02008-10-25 16:22:41 +00003398
3399static VTS* VtsID__to_VTS ( VtsID vi ) {
3400 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3401 tl_assert(te->vts);
3402 return te->vts;
3403}
3404
3405static void VtsID__pp ( VtsID vi ) {
sewardjf98e1c02008-10-25 16:22:41 +00003406 VTS* vts = VtsID__to_VTS(vi);
florianb28fe892014-10-28 20:52:07 +00003407 VTS__show( vts );
sewardjf98e1c02008-10-25 16:22:41 +00003408}
3409
3410/* compute partial ordering relation of vi1 and vi2. */
3411__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00003412static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00003413 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00003414 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00003415 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00003416 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00003417 tl_assert(vi1 != vi2);
3418 ////++
sewardj23f12002009-07-24 08:45:08 +00003419 stats__cmpLEQ_queries++;
3420 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3421 if (cmpLEQ_cache[hash].vi1 == vi1
3422 && cmpLEQ_cache[hash].vi2 == vi2)
3423 return cmpLEQ_cache[hash].leq;
3424 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00003425 ////--
3426 v1 = VtsID__to_VTS(vi1);
3427 v2 = VtsID__to_VTS(vi2);
sewardje4cce742011-02-24 15:25:24 +00003428 leq = VTS__cmpLEQ( v1, v2 ) == 0;
sewardjf98e1c02008-10-25 16:22:41 +00003429 ////++
sewardj23f12002009-07-24 08:45:08 +00003430 cmpLEQ_cache[hash].vi1 = vi1;
3431 cmpLEQ_cache[hash].vi2 = vi2;
3432 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00003433 ////--
sewardj23f12002009-07-24 08:45:08 +00003434 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00003435}
sewardj23f12002009-07-24 08:45:08 +00003436static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3437 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003438}
3439
3440/* compute binary join */
3441__attribute__((noinline))
3442static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3443 UInt hash;
3444 VtsID res;
sewardj7aa38a92011-02-27 23:04:12 +00003445 VTS *vts1, *vts2;
sewardjf98e1c02008-10-25 16:22:41 +00003446 //if (vi1 == vi2) return vi1;
3447 tl_assert(vi1 != vi2);
3448 ////++
3449 stats__join2_queries++;
3450 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3451 if (join2_cache[hash].vi1 == vi1
3452 && join2_cache[hash].vi2 == vi2)
3453 return join2_cache[hash].res;
3454 stats__join2_misses++;
3455 ////--
3456 vts1 = VtsID__to_VTS(vi1);
3457 vts2 = VtsID__to_VTS(vi2);
sewardj7aa38a92011-02-27 23:04:12 +00003458 temp_max_sized_VTS->usedTS = 0;
3459 VTS__join(temp_max_sized_VTS, vts1,vts2);
3460 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003461 ////++
3462 join2_cache[hash].vi1 = vi1;
3463 join2_cache[hash].vi2 = vi2;
3464 join2_cache[hash].res = res;
3465 ////--
3466 return res;
3467}
3468static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003469 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003470}
3471
3472/* create a singleton VTS, namely [thr:1] */
3473static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
sewardj7aa38a92011-02-27 23:04:12 +00003474 temp_max_sized_VTS->usedTS = 0;
3475 VTS__singleton(temp_max_sized_VTS, thr,tym);
3476 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003477}
3478
3479/* tick operation, creates value 1 if specified index is absent */
3480static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3481 VTS* vts = VtsID__to_VTS(vi);
sewardj7aa38a92011-02-27 23:04:12 +00003482 temp_max_sized_VTS->usedTS = 0;
3483 VTS__tick(temp_max_sized_VTS, idx,vts);
3484 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003485}
3486
3487/* index into a VTS (only for assertions) */
3488static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3489 VTS* vts = VtsID__to_VTS(vi);
3490 return VTS__indexAt_SLOW( vts, idx );
3491}
3492
sewardj23f12002009-07-24 08:45:08 +00003493/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3494 any, really) element in vi1 which is pointwise greater-than the
3495 corresponding element in vi2. If no such element exists, return
3496 NULL. This needs to be fairly quick since it is called every time
3497 a race is detected. */
3498static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3499{
3500 VTS *vts1, *vts2;
sewardje4cce742011-02-24 15:25:24 +00003501 Thr* diffthr;
3502 ThrID diffthrid;
sewardj23f12002009-07-24 08:45:08 +00003503 tl_assert(vi1 != vi2);
3504 vts1 = VtsID__to_VTS(vi1);
3505 vts2 = VtsID__to_VTS(vi2);
3506 tl_assert(vts1 != vts2);
sewardje4cce742011-02-24 15:25:24 +00003507 diffthrid = VTS__cmpLEQ(vts1, vts2);
3508 diffthr = Thr__from_ThrID(diffthrid);
sewardj23f12002009-07-24 08:45:08 +00003509 tl_assert(diffthr); /* else they are LEQ ! */
3510 return diffthr;
3511}
3512
3513
3514/////////////////////////////////////////////////////////
3515// //
3516// Filters //
3517// //
3518/////////////////////////////////////////////////////////
3519
sewardj23f12002009-07-24 08:45:08 +00003520/* Forget everything we know -- clear the filter and let everything
3521 through. This needs to be as fast as possible, since it is called
3522 every time the running thread changes, and every time a thread's
3523 vector clocks change, which can be quite frequent. The obvious
3524 fast way to do this is simply to stuff in tags which we know are
3525 not going to match anything, since they're not aligned to the start
3526 of a line. */
florian6bd9dc12012-11-23 16:17:43 +00003527static void Filter__clear ( Filter* fi, const HChar* who )
sewardj23f12002009-07-24 08:45:08 +00003528{
3529 UWord i;
3530 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3531 for (i = 0; i < FI_NUM_LINES; i += 8) {
3532 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3533 fi->tags[i+1] = 1;
3534 fi->tags[i+2] = 1;
3535 fi->tags[i+3] = 1;
3536 fi->tags[i+4] = 1;
3537 fi->tags[i+5] = 1;
3538 fi->tags[i+6] = 1;
3539 fi->tags[i+7] = 1;
3540 }
3541 tl_assert(i == FI_NUM_LINES);
3542}
3543
3544/* Clearing an arbitrary range in the filter. Unfortunately
3545 we have to do this due to core-supplied new/die-mem events. */
3546
3547static void Filter__clear_1byte ( Filter* fi, Addr a )
3548{
3549 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3550 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3551 FiLine* line = &fi->lines[lineno];
3552 UWord loff = (a - atag) / 8;
3553 UShort mask = 0x3 << (2 * (a & 7));
3554 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3555 if (LIKELY( fi->tags[lineno] == atag )) {
3556 /* hit. clear the bits. */
3557 UShort u16 = line->u16s[loff];
3558 line->u16s[loff] = u16 & ~mask; /* clear them */
3559 } else {
3560 /* miss. The filter doesn't hold this address, so ignore. */
3561 }
3562}
3563
3564static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3565{
3566 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3567 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3568 FiLine* line = &fi->lines[lineno];
3569 UWord loff = (a - atag) / 8;
3570 if (LIKELY( fi->tags[lineno] == atag )) {
3571 line->u16s[loff] = 0;
3572 } else {
3573 /* miss. The filter doesn't hold this address, so ignore. */
3574 }
3575}
3576
philippefc00a2a2015-05-15 11:41:54 +00003577/* Only used to verify the fast Filter__clear_range */
3578__attribute__((unused))
3579static void Filter__clear_range_SLOW ( Filter* fi, Addr a, UWord len )
sewardj23f12002009-07-24 08:45:08 +00003580{
philippefc00a2a2015-05-15 11:41:54 +00003581 tl_assert (CHECK_ZSM);
3582
sewardj23f12002009-07-24 08:45:08 +00003583 /* slowly do part preceding 8-alignment */
3584 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3585 Filter__clear_1byte( fi, a );
3586 a++;
3587 len--;
3588 }
3589 /* vector loop */
3590 while (len >= 8) {
3591 Filter__clear_8bytes_aligned( fi, a );
3592 a += 8;
3593 len -= 8;
3594 }
3595 /* slowly do tail */
3596 while (UNLIKELY(len > 0)) {
3597 Filter__clear_1byte( fi, a );
3598 a++;
3599 len--;
3600 }
3601}
3602
philippefc00a2a2015-05-15 11:41:54 +00003603static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3604{
3605# if CHECK_ZSM > 0
3606 /* We check the below more complex algorithm with the simple one.
3607 This check is very expensive : we do first the slow way on a
3608 copy of the data, then do it the fast way. On RETURN, we check
3609 the two values are equal. */
3610 Filter fi_check = *fi;
3611 Filter__clear_range_SLOW(&fi_check, a, len);
3612# define RETURN goto check_and_return
3613# else
3614# define RETURN return
3615# endif
3616
3617 Addr begtag = FI_GET_TAG(a); /* tag of range begin */
3618
3619 Addr end = a + len - 1;
3620 Addr endtag = FI_GET_TAG(end); /* tag of range end. */
3621
3622 UWord rlen = len; /* remaining length to clear */
3623
3624 Addr c = a; /* Current position we are clearing. */
3625 UWord clineno = FI_GET_LINENO(c); /* Current lineno we are clearing */
3626 FiLine* cline; /* Current line we are clearing */
3627 UWord cloff; /* Current offset in line we are clearing, when clearing
3628 partial lines. */
3629
3630 UShort u16;
3631
3632 STATIC_ASSERT (FI_LINE_SZB == 32);
3633 // Below assumes filter lines are 32 bytes
3634
3635 if (LIKELY(fi->tags[clineno] == begtag)) {
3636 /* LIKELY for the heavy caller VG_(unknown_SP_update). */
3637 /* First filter line matches begtag.
3638 If c is not at the filter line begin, the below will clear
3639 the filter line bytes starting from c. */
3640 cline = &fi->lines[clineno];
3641 cloff = (c - begtag) / 8;
3642
3643 /* First the byte(s) needed to reach 8-alignment */
3644 if (UNLIKELY(!VG_IS_8_ALIGNED(c))) {
3645 /* hiB is the nr of bytes (higher addresses) from c to reach
3646 8-aligment. */
3647 UWord hiB = 8 - (c & 7);
3648 /* Compute 2-bit/byte mask representing hiB bytes [c..c+hiB[
3649 mask is C000 , F000, FC00, FF00, FFC0, FFF0 or FFFC for the byte
3650 range 7..7 6..7 5..7 4..7 3..7 2..7 1..7 */
3651 UShort mask = 0xFFFF << (16 - 2*hiB);
3652
3653 u16 = cline->u16s[cloff];
3654 if (LIKELY(rlen >= hiB)) {
3655 cline->u16s[cloff] = u16 & ~mask; /* clear all hiB from c */
3656 rlen -= hiB;
3657 c += hiB;
3658 cloff += 1;
3659 } else {
3660 /* Only have the bits for rlen bytes bytes. */
3661 mask = mask & ~(0xFFFF << (16 - 2*(hiB-rlen)));
3662 cline->u16s[cloff] = u16 & ~mask; /* clear rlen bytes from c. */
3663 RETURN; // We have cleared all what we can.
3664 }
3665 }
3666 /* c is now 8 aligned. Clear by 8 aligned bytes,
3667 till c is filter-line aligned */
3668 while (!VG_IS_32_ALIGNED(c) && rlen >= 8) {
3669 cline->u16s[cloff] = 0;
3670 c += 8;
3671 rlen -= 8;
3672 cloff += 1;
3673 }
3674 } else {
3675 c = begtag + FI_LINE_SZB;
3676 if (c > end)
3677 RETURN; // We have cleared all what we can.
3678 rlen -= c - a;
3679 }
3680 // We have changed c, so re-establish clineno.
3681 clineno = FI_GET_LINENO(c);
3682
3683 if (rlen >= FI_LINE_SZB) {
3684 /* Here, c is filter line-aligned. Clear all full lines that
3685 overlap with the range starting at c, made of a full lines */
3686 UWord nfull = rlen / FI_LINE_SZB;
3687 UWord full_len = nfull * FI_LINE_SZB;
3688 rlen -= full_len;
3689 if (nfull > FI_NUM_LINES)
3690 nfull = FI_NUM_LINES; // no need to check several times the same entry.
3691
3692 for (UWord n = 0; n < nfull; n++) {
3693 if (UNLIKELY(address_in_range(fi->tags[clineno], c, full_len))) {
3694 cline = &fi->lines[clineno];
3695 cline->u16s[0] = 0;
3696 cline->u16s[1] = 0;
3697 cline->u16s[2] = 0;
3698 cline->u16s[3] = 0;
3699 STATIC_ASSERT (4 == sizeof(cline->u16s)/sizeof(cline->u16s[0]));
3700 }
3701 clineno++;
3702 if (UNLIKELY(clineno == FI_NUM_LINES))
3703 clineno = 0;
3704 }
3705
3706 c += full_len;
3707 clineno = FI_GET_LINENO(c);
3708 }
3709
3710 if (CHECK_ZSM) {
3711 tl_assert(VG_IS_8_ALIGNED(c));
3712 tl_assert(clineno == FI_GET_LINENO(c));
3713 }
3714
3715 /* Do the last filter line, if it was not cleared as a full filter line */
3716 if (UNLIKELY(rlen > 0) && fi->tags[clineno] == endtag) {
3717 cline = &fi->lines[clineno];
3718 cloff = (c - endtag) / 8;
3719 if (CHECK_ZSM) tl_assert(FI_GET_TAG(c) == endtag);
3720
3721 /* c is 8 aligned. Clear by 8 aligned bytes, till we have less than
3722 8 bytes. */
3723 while (rlen >= 8) {
3724 cline->u16s[cloff] = 0;
3725 c += 8;
3726 rlen -= 8;
3727 cloff += 1;
3728 }
3729 /* Then the remaining byte(s) */
3730 if (rlen > 0) {
3731 /* nr of bytes from c to reach end. */
3732 UWord loB = rlen;
3733 /* Compute mask representing loB bytes [c..c+loB[ :
3734 mask is 0003, 000F, 003F, 00FF, 03FF, 0FFF or 3FFF */
3735 UShort mask = 0xFFFF >> (16 - 2*loB);
3736
3737 u16 = cline->u16s[cloff];
3738 cline->u16s[cloff] = u16 & ~mask; /* clear all loB from c */
3739 }
3740 }
3741
3742# if CHECK_ZSM > 0
3743 check_and_return:
3744 tl_assert (VG_(memcmp)(&fi_check, fi, sizeof(fi_check)) == 0);
3745# endif
3746# undef RETURN
3747}
sewardj23f12002009-07-24 08:45:08 +00003748
3749/* ------ Read handlers for the filter. ------ */
3750
3751static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3752{
3753 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3754 return False;
3755 {
3756 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3757 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3758 FiLine* line = &fi->lines[lineno];
3759 UWord loff = (a - atag) / 8;
3760 UShort mask = 0xAAAA;
3761 if (LIKELY( fi->tags[lineno] == atag )) {
3762 /* hit. check line and update. */
3763 UShort u16 = line->u16s[loff];
3764 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3765 line->u16s[loff] = u16 | mask; /* set them */
3766 return ok;
3767 } else {
3768 /* miss. nuke existing line and re-use it. */
3769 UWord i;
3770 fi->tags[lineno] = atag;
3771 for (i = 0; i < FI_LINE_SZB / 8; i++)
3772 line->u16s[i] = 0;
3773 line->u16s[loff] = mask;
3774 return False;
3775 }
3776 }
3777}
3778
3779static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3780{
3781 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3782 return False;
3783 {
3784 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3785 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3786 FiLine* line = &fi->lines[lineno];
3787 UWord loff = (a - atag) / 8;
3788 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3789 if (LIKELY( fi->tags[lineno] == atag )) {
3790 /* hit. check line and update. */
3791 UShort u16 = line->u16s[loff];
3792 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3793 line->u16s[loff] = u16 | mask; /* set them */
3794 return ok;
3795 } else {
3796 /* miss. nuke existing line and re-use it. */
3797 UWord i;
3798 fi->tags[lineno] = atag;
3799 for (i = 0; i < FI_LINE_SZB / 8; i++)
3800 line->u16s[i] = 0;
3801 line->u16s[loff] = mask;
3802 return False;
3803 }
3804 }
3805}
3806
3807static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3808{
3809 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3810 return False;
3811 {
3812 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3813 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3814 FiLine* line = &fi->lines[lineno];
3815 UWord loff = (a - atag) / 8;
3816 UShort mask = 0xA << (2 * (a & 6));
3817 /* mask is A000, 0A00, 00A0 or 000A */
3818 if (LIKELY( fi->tags[lineno] == atag )) {
3819 /* hit. check line and update. */
3820 UShort u16 = line->u16s[loff];
3821 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3822 line->u16s[loff] = u16 | mask; /* set them */
3823 return ok;
3824 } else {
3825 /* miss. nuke existing line and re-use it. */
3826 UWord i;
3827 fi->tags[lineno] = atag;
3828 for (i = 0; i < FI_LINE_SZB / 8; i++)
3829 line->u16s[i] = 0;
3830 line->u16s[loff] = mask;
3831 return False;
3832 }
3833 }
3834}
3835
3836static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3837{
3838 {
3839 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3840 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3841 FiLine* line = &fi->lines[lineno];
3842 UWord loff = (a - atag) / 8;
3843 UShort mask = 0x2 << (2 * (a & 7));
3844 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3845 if (LIKELY( fi->tags[lineno] == atag )) {
3846 /* hit. check line and update. */
3847 UShort u16 = line->u16s[loff];
3848 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3849 line->u16s[loff] = u16 | mask; /* set them */
3850 return ok;
3851 } else {
3852 /* miss. nuke existing line and re-use it. */
3853 UWord i;
3854 fi->tags[lineno] = atag;
3855 for (i = 0; i < FI_LINE_SZB / 8; i++)
3856 line->u16s[i] = 0;
3857 line->u16s[loff] = mask;
3858 return False;
3859 }
3860 }
3861}
3862
3863
3864/* ------ Write handlers for the filter. ------ */
3865
3866static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3867{
3868 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3869 return False;
3870 {
3871 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3872 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3873 FiLine* line = &fi->lines[lineno];
3874 UWord loff = (a - atag) / 8;
3875 UShort mask = 0xFFFF;
3876 if (LIKELY( fi->tags[lineno] == atag )) {
3877 /* hit. check line and update. */
3878 UShort u16 = line->u16s[loff];
3879 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3880 line->u16s[loff] = u16 | mask; /* set them */
3881 return ok;
3882 } else {
3883 /* miss. nuke existing line and re-use it. */
3884 UWord i;
3885 fi->tags[lineno] = atag;
3886 for (i = 0; i < FI_LINE_SZB / 8; i++)
3887 line->u16s[i] = 0;
3888 line->u16s[loff] = mask;
3889 return False;
3890 }
3891 }
3892}
3893
3894static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3895{
3896 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3897 return False;
3898 {
3899 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3900 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3901 FiLine* line = &fi->lines[lineno];
3902 UWord loff = (a - atag) / 8;
3903 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3904 if (LIKELY( fi->tags[lineno] == atag )) {
3905 /* hit. check line and update. */
3906 UShort u16 = line->u16s[loff];
3907 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3908 line->u16s[loff] = u16 | mask; /* set them */
3909 return ok;
3910 } else {
3911 /* miss. nuke existing line and re-use it. */
3912 UWord i;
3913 fi->tags[lineno] = atag;
3914 for (i = 0; i < FI_LINE_SZB / 8; i++)
3915 line->u16s[i] = 0;
3916 line->u16s[loff] = mask;
3917 return False;
3918 }
3919 }
3920}
3921
3922static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3923{
3924 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3925 return False;
3926 {
3927 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3928 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3929 FiLine* line = &fi->lines[lineno];
3930 UWord loff = (a - atag) / 8;
3931 UShort mask = 0xF << (2 * (a & 6));
3932 /* mask is F000, 0F00, 00F0 or 000F */
3933 if (LIKELY( fi->tags[lineno] == atag )) {
3934 /* hit. check line and update. */
3935 UShort u16 = line->u16s[loff];
3936 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3937 line->u16s[loff] = u16 | mask; /* set them */
3938 return ok;
3939 } else {
3940 /* miss. nuke existing line and re-use it. */
3941 UWord i;
3942 fi->tags[lineno] = atag;
3943 for (i = 0; i < FI_LINE_SZB / 8; i++)
3944 line->u16s[i] = 0;
3945 line->u16s[loff] = mask;
3946 return False;
3947 }
3948 }
3949}
3950
3951static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3952{
3953 {
3954 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3955 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3956 FiLine* line = &fi->lines[lineno];
3957 UWord loff = (a - atag) / 8;
3958 UShort mask = 0x3 << (2 * (a & 7));
3959 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3960 if (LIKELY( fi->tags[lineno] == atag )) {
3961 /* hit. check line and update. */
3962 UShort u16 = line->u16s[loff];
3963 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3964 line->u16s[loff] = u16 | mask; /* set them */
3965 return ok;
3966 } else {
3967 /* miss. nuke existing line and re-use it. */
3968 UWord i;
3969 fi->tags[lineno] = atag;
3970 for (i = 0; i < FI_LINE_SZB / 8; i++)
3971 line->u16s[i] = 0;
3972 line->u16s[loff] = mask;
3973 return False;
3974 }
3975 }
3976}
3977
sewardjf98e1c02008-10-25 16:22:41 +00003978
3979/////////////////////////////////////////////////////////
3980// //
3981// Threads //
3982// //
3983/////////////////////////////////////////////////////////
3984
sewardje4cce742011-02-24 15:25:24 +00003985/* Maps ThrID values to their Thr*s (which contain ThrID values that
3986 should point back to the relevant slot in the array. Lowest
3987 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
3988static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
3989
3990/* And a counter to dole out ThrID values. For rationale/background,
3991 see comments on definition of ScalarTS (far) above. */
sewardj7aa38a92011-02-27 23:04:12 +00003992static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
sewardje4cce742011-02-24 15:25:24 +00003993
3994static ThrID Thr__to_ThrID ( Thr* thr ) {
3995 return thr->thrid;
3996}
3997static Thr* Thr__from_ThrID ( UInt thrid ) {
3998 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
3999 tl_assert(thr->thrid == thrid);
4000 return thr;
4001}
4002
4003static Thr* Thr__new ( void )
4004{
sewardjf98e1c02008-10-25 16:22:41 +00004005 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
4006 thr->viR = VtsID_INVALID;
4007 thr->viW = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00004008 thr->llexit_done = False;
4009 thr->joinedwith_done = False;
sewardj23f12002009-07-24 08:45:08 +00004010 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
philippeca903bb2014-04-26 22:50:08 +00004011 if (HG_(clo_history_level) == 1)
4012 thr->local_Kws_n_stacks
4013 = VG_(newXA)( HG_(zalloc),
4014 "libhb.Thr__new.3 (local_Kws_and_stacks)",
4015 HG_(free), sizeof(ULong_n_EC) );
sewardje4cce742011-02-24 15:25:24 +00004016
4017 /* Add this Thr* <-> ThrID binding to the mapping, and
4018 cross-check */
4019 if (!thrid_to_thr_map) {
4020 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
4021 HG_(free), sizeof(Thr*) );
sewardje4cce742011-02-24 15:25:24 +00004022 }
4023
sewardj7aa38a92011-02-27 23:04:12 +00004024 if (thrid_counter >= ThrID_MAX_VALID) {
sewardje4cce742011-02-24 15:25:24 +00004025 /* We're hosed. We have to stop. */
4026 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
4027 }
4028
4029 thr->thrid = thrid_counter++;
4030 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
4031 tl_assert(ix + 1024 == thr->thrid);
4032
sewardjf98e1c02008-10-25 16:22:41 +00004033 return thr;
4034}
4035
sewardj8ab2c132009-08-02 09:34:35 +00004036static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00004037{
4038 Word nPresent;
4039 ULong_n_EC pair;
4040 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00004041
4042 // We only collect this info at history level 1 (approx)
4043 if (HG_(clo_history_level) != 1)
4044 return;
4045
sewardj8ab2c132009-08-02 09:34:35 +00004046 /* This is the scalar Kw for thr. */
4047 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00004048 pair.ec = main_get_EC( thr );
4049 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00004050 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004051
4052 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00004053 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00004054
4055 /* Throw away old stacks, if necessary. We can't accumulate stuff
4056 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00004057 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
4058 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
4059 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4060 if (0)
4061 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00004062 thr, pair.ull, pair.ec );
4063 }
4064
4065 if (nPresent > 0) {
4066 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00004067 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
4068 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00004069 }
4070
4071 if (nPresent == 0)
4072 pair.ec = NULL;
4073
sewardj8ab2c132009-08-02 09:34:35 +00004074 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00004075
4076 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00004077 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00004078 thr, pair.ull, pair.ec );
4079 if (0)
4080 VG_(pp_ExeContext)(pair.ec);
4081}
4082
florian6bd9dc12012-11-23 16:17:43 +00004083static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
4084 const ULong_n_EC* pair2 )
sewardj23f12002009-07-24 08:45:08 +00004085{
4086 if (pair1->ull < pair2->ull) return -1;
4087 if (pair1->ull > pair2->ull) return 1;
4088 return 0;
4089}
4090
sewardjf98e1c02008-10-25 16:22:41 +00004091
4092/////////////////////////////////////////////////////////
4093// //
4094// Shadow Values //
4095// //
4096/////////////////////////////////////////////////////////
4097
4098// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
4099// hb_zsm.h. We have to do everything else here.
4100
4101/* SVal is 64 bit unsigned int.
4102
4103 <---------30---------> <---------30--------->
4104 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00004105 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00004106 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
4107
sewardjf98e1c02008-10-25 16:22:41 +00004108*/
4109#define SVAL_TAGMASK (3ULL << 62)
4110
4111static inline Bool SVal__isC ( SVal s ) {
4112 return (0ULL << 62) == (s & SVAL_TAGMASK);
4113}
4114static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
4115 //tl_assert(VtsID__is_valid(rmini));
4116 //tl_assert(VtsID__is_valid(wmini));
4117 return (((ULong)rmini) << 32) | ((ULong)wmini);
4118}
4119static inline VtsID SVal__unC_Rmin ( SVal s ) {
4120 tl_assert(SVal__isC(s));
4121 return (VtsID)(s >> 32);
4122}
4123static inline VtsID SVal__unC_Wmin ( SVal s ) {
4124 tl_assert(SVal__isC(s));
4125 return (VtsID)(s & 0xFFFFFFFFULL);
4126}
4127
sewardj23f12002009-07-24 08:45:08 +00004128static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004129 return (2ULL << 62) == (s & SVAL_TAGMASK);
4130}
sewardj5aa09bf2014-06-20 14:25:53 +00004131__attribute__((unused))
sewardj23f12002009-07-24 08:45:08 +00004132static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00004133 return 2ULL << 62;
4134}
4135
4136/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00004137static inline void SVal__rcinc ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004138 if (SVal__isC(s)) {
4139 VtsID__rcinc( SVal__unC_Rmin(s) );
4140 VtsID__rcinc( SVal__unC_Wmin(s) );
4141 }
4142}
4143
4144/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00004145static inline void SVal__rcdec ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004146 if (SVal__isC(s)) {
4147 VtsID__rcdec( SVal__unC_Rmin(s) );
4148 VtsID__rcdec( SVal__unC_Wmin(s) );
4149 }
4150}
4151
philippe71ed3c92015-05-17 19:32:42 +00004152static inline void *SVal2Ptr (SVal s)
4153{
4154 return (void*)(UWord)s;
4155}
4156
4157static inline SVal Ptr2SVal (void* ptr)
4158{
4159 return (SVal)(UWord)ptr;
4160}
4161
4162
sewardjf98e1c02008-10-25 16:22:41 +00004163
4164/////////////////////////////////////////////////////////
4165// //
4166// Change-event map2 //
4167// //
4168/////////////////////////////////////////////////////////
4169
sewardjf98e1c02008-10-25 16:22:41 +00004170/* This is in two parts:
4171
sewardj23f12002009-07-24 08:45:08 +00004172 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00004173 traces. When the reference count of a stack trace becomes zero,
4174 it is removed from the set and freed up. The intent is to have
4175 a set of stack traces which can be referred to from (2), but to
4176 only represent each one once. The set is indexed/searched by
4177 ordering on the stack trace vectors.
4178
sewardj849b0ed2008-12-21 10:43:10 +00004179 2. A SparseWA of OldRefs. These store information about each old
4180 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00004181 location for which the information is recorded. For LRU
philippecabdbb52015-04-20 21:33:16 +00004182 purposes, each OldRef in the SparseWA is also on a doubly
4183 linked list maintaining the order in which the OldRef were most
4184 recently accessed.
sewardjf98e1c02008-10-25 16:22:41 +00004185
4186 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00004187 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
4188 size) triples to RCECs. This allows us to collect the last
4189 access-traceback by up to N_OLDREF_ACCS different triples for
4190 this location. The accs[] array is a MTF-array. If a binding
4191 falls off the end, that's too bad -- we will lose info about
4192 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00004193
philippecabdbb52015-04-20 21:33:16 +00004194 We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
4195 Then we do exact LRU discarding. For each discarded OldRef we must
sewardjf98e1c02008-10-25 16:22:41 +00004196 of course decrement the reference count on the all RCECs it
4197 refers to, in order that entries from (1) eventually get
4198 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00004199
4200 A major improvement in reliability of this mechanism would be to
4201 have a dynamically sized OldRef.accs[] array, so no entries ever
4202 fall off the end. In investigations (Dec 08) it appears that a
4203 major cause for the non-availability of conflicting-access traces
4204 in race reports is caused by the fixed size of this array. I
4205 suspect for most OldRefs, only a few entries are used, but for a
4206 minority of cases there is an overflow, leading to info lossage.
4207 Investigations also suggest this is very workload and scheduling
4208 sensitive. Therefore a dynamic sizing would be better.
4209
philippe6643e962012-01-17 21:16:30 +00004210 However, dynamic sizing would defeat the use of a PoolAllocator
sewardj849b0ed2008-12-21 10:43:10 +00004211 for OldRef structures. And that's important for performance. So
4212 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00004213*/
4214
4215
4216static UWord stats__ctxt_rcdec1 = 0;
4217static UWord stats__ctxt_rcdec2 = 0;
4218static UWord stats__ctxt_rcdec3 = 0;
4219static UWord stats__ctxt_rcdec_calls = 0;
4220static UWord stats__ctxt_rcdec_discards = 0;
4221static UWord stats__ctxt_rcdec1_eq = 0;
4222
4223static UWord stats__ctxt_tab_curr = 0;
4224static UWord stats__ctxt_tab_max = 0;
4225
4226static UWord stats__ctxt_tab_qs = 0;
4227static UWord stats__ctxt_tab_cmps = 0;
4228
4229
4230///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00004231//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00004232///
4233
4234#define N_FRAMES 8
4235
4236// (UInt) `echo "Reference Counted Execution Context" | md5sum`
4237#define RCEC_MAGIC 0xab88abb2UL
4238
4239//#define N_RCEC_TAB 98317 /* prime */
4240#define N_RCEC_TAB 196613 /* prime */
4241
4242typedef
4243 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00004244 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00004245 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00004246 UWord rc;
4247 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00004248 UWord frames_hash; /* hash of all the frames */
4249 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00004250 }
4251 RCEC;
4252
philippecabdbb52015-04-20 21:33:16 +00004253//////////// BEGIN RCEC pool allocator
4254static PoolAlloc* rcec_pool_allocator;
4255static RCEC* alloc_RCEC ( void ) {
4256 return VG_(allocEltPA) ( rcec_pool_allocator );
4257}
4258
4259static void free_RCEC ( RCEC* rcec ) {
4260 tl_assert(rcec->magic == RCEC_MAGIC);
4261 VG_(freeEltPA)( rcec_pool_allocator, rcec );
4262}
4263//////////// END RCEC pool allocator
4264
sewardjf98e1c02008-10-25 16:22:41 +00004265static RCEC** contextTab = NULL; /* hash table of RCEC*s */
4266
philippecabdbb52015-04-20 21:33:16 +00004267/* Count of allocated RCEC having ref count > 0 */
4268static UWord RCEC_referenced = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004269
4270/* Gives an arbitrary total order on RCEC .frames fields */
4271static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4272 Word i;
4273 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4274 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00004275 if (ec1->frames_hash < ec2->frames_hash) return -1;
4276 if (ec1->frames_hash > ec2->frames_hash) return 1;
4277 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004278 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00004279 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00004280 }
4281 return 0;
4282}
4283
4284
4285/* Dec the ref of this RCEC. */
4286static void ctxt__rcdec ( RCEC* ec )
4287{
4288 stats__ctxt_rcdec_calls++;
4289 tl_assert(ec && ec->magic == RCEC_MAGIC);
4290 tl_assert(ec->rc > 0);
4291 ec->rc--;
philippecabdbb52015-04-20 21:33:16 +00004292 if (ec->rc == 0)
4293 RCEC_referenced--;
sewardjf98e1c02008-10-25 16:22:41 +00004294}
4295
4296static void ctxt__rcinc ( RCEC* ec )
4297{
4298 tl_assert(ec && ec->magic == RCEC_MAGIC);
philippecabdbb52015-04-20 21:33:16 +00004299 if (ec->rc == 0)
4300 RCEC_referenced++;
sewardjf98e1c02008-10-25 16:22:41 +00004301 ec->rc++;
4302}
4303
4304
4305/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
4306 move it one step closer the the front of the list, so as to make
4307 subsequent searches for it cheaper. */
4308static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4309{
4310 RCEC *ec0, *ec1, *ec2;
4311 if (ec == *headp)
4312 tl_assert(0); /* already at head of list */
4313 tl_assert(ec != NULL);
4314 ec0 = *headp;
4315 ec1 = NULL;
4316 ec2 = NULL;
4317 while (True) {
4318 if (ec0 == NULL || ec0 == ec) break;
4319 ec2 = ec1;
4320 ec1 = ec0;
4321 ec0 = ec0->next;
4322 }
4323 tl_assert(ec0 == ec);
4324 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4325 RCEC* tmp;
4326 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4327 predecessor. Swap ec0 and ec1, that is, move ec0 one step
4328 closer to the start of the list. */
4329 tl_assert(ec2->next == ec1);
4330 tl_assert(ec1->next == ec0);
4331 tmp = ec0->next;
4332 ec2->next = ec0;
4333 ec0->next = ec1;
4334 ec1->next = tmp;
4335 }
4336 else
4337 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4338 /* it's second in the list. */
4339 tl_assert(*headp == ec1);
4340 tl_assert(ec1->next == ec0);
4341 ec1->next = ec0->next;
4342 ec0->next = ec1;
4343 *headp = ec0;
4344 }
4345}
4346
4347
4348/* Find the given RCEC in the tree, and return a pointer to it. Or,
4349 if not present, add the given one to the tree (by making a copy of
4350 it, so the caller can immediately deallocate the original) and
4351 return a pointer to the copy. The caller can safely have 'example'
4352 on its stack, since we will always return a pointer to a copy of
4353 it, not to the original. Note that the inserted node will have .rc
4354 of zero and so the caller must immediatly increment it. */
4355__attribute__((noinline))
4356static RCEC* ctxt__find_or_add ( RCEC* example )
4357{
4358 UWord hent;
4359 RCEC* copy;
4360 tl_assert(example && example->magic == RCEC_MAGIC);
4361 tl_assert(example->rc == 0);
4362
4363 /* Search the hash table to see if we already have it. */
4364 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00004365 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00004366 copy = contextTab[hent];
4367 while (1) {
4368 if (!copy) break;
4369 tl_assert(copy->magic == RCEC_MAGIC);
4370 stats__ctxt_tab_cmps++;
4371 if (0 == RCEC__cmp_by_frames(copy, example)) break;
4372 copy = copy->next;
4373 }
4374
4375 if (copy) {
4376 tl_assert(copy != example);
4377 /* optimisation: if it's not at the head of its list, move 1
4378 step fwds, to make future searches cheaper */
4379 if (copy != contextTab[hent]) {
4380 move_RCEC_one_step_forward( &contextTab[hent], copy );
4381 }
4382 } else {
sewardjd86e3a22008-12-03 11:39:37 +00004383 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00004384 tl_assert(copy != example);
4385 *copy = *example;
4386 copy->next = contextTab[hent];
4387 contextTab[hent] = copy;
4388 stats__ctxt_tab_curr++;
4389 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4390 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4391 }
4392 return copy;
4393}
4394
4395static inline UWord ROLW ( UWord w, Int n )
4396{
4397 Int bpw = 8 * sizeof(UWord);
4398 w = (w << n) | (w >> (bpw-n));
4399 return w;
4400}
4401
4402__attribute__((noinline))
4403static RCEC* get_RCEC ( Thr* thr )
4404{
4405 UWord hash, i;
4406 RCEC example;
4407 example.magic = RCEC_MAGIC;
4408 example.rc = 0;
4409 example.rcX = 0;
florian195623b2013-01-22 00:25:05 +00004410 example.next = NULL;
njn6c83d5e2009-05-05 23:46:24 +00004411 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00004412 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00004413 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004414 hash ^= example.frames[i];
4415 hash = ROLW(hash, 19);
4416 }
njn6c83d5e2009-05-05 23:46:24 +00004417 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00004418 return ctxt__find_or_add( &example );
4419}
4420
4421///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00004422//// Part (2):
4423/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00004424///
4425
sewardjffce8152011-06-24 10:09:41 +00004426/* Records an access: a thread, a context (size & writeness) and the
4427 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
4428 2, 10 = 4, 11 = 8.
sewardjc5ea9962008-12-07 01:41:46 +00004429*/
sewardjffce8152011-06-24 10:09:41 +00004430typedef
4431 struct {
4432 RCEC* rcec;
4433 WordSetID locksHeldW;
4434 UInt thrid : SCALARTS_N_THRBITS;
4435 UInt szLg2B : 2;
4436 UInt isW : 1;
4437 }
4438 Thr_n_RCEC;
sewardjf98e1c02008-10-25 16:22:41 +00004439
sewardj849b0ed2008-12-21 10:43:10 +00004440#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00004441
4442typedef
philippecabdbb52015-04-20 21:33:16 +00004443 struct OldRef {
4444 struct OldRef *prev; // to refs older than this one
4445 struct OldRef *next; // to refs newer that this one
4446 Addr ga; // Address for which we record up to N_OLDREF_ACCS accesses.
sewardjffce8152011-06-24 10:09:41 +00004447 /* unused slots in this array have .thrid == 0, which is invalid */
sewardjf98e1c02008-10-25 16:22:41 +00004448 Thr_n_RCEC accs[N_OLDREF_ACCS];
4449 }
4450 OldRef;
philippecabdbb52015-04-20 21:33:16 +00004451/* We need ga in OldRef in order to remove OldRef from the sparsewa
4452 by key (i.e. ga) when re-using the lru OldRef. */
sewardjd86e3a22008-12-03 11:39:37 +00004453
philippe6643e962012-01-17 21:16:30 +00004454//////////// BEGIN OldRef pool allocator
4455static PoolAlloc* oldref_pool_allocator;
philippecabdbb52015-04-20 21:33:16 +00004456// Note: We only allocate elements in this pool allocator, we never free them.
4457// We stop allocating elements at VG_(clo_conflict_cache_size).
philippe6643e962012-01-17 21:16:30 +00004458//////////// END OldRef pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00004459
philippecabdbb52015-04-20 21:33:16 +00004460static OldRef mru;
4461static OldRef lru;
4462// A double linked list, chaining all OldREf in a mru/lru order.
4463// mru/lru are sentinel nodes.
4464// Whenever an oldref is re-used, its position is changed as the most recently
4465// used (i.e. pointed to by mru.prev).
4466// When a new oldref is needed, it is allocated from the pool
4467// if we have not yet reached --conflict-cache-size.
4468// Otherwise, if all oldref have already been allocated,
4469// the least recently used (i.e. pointed to by lru.next) is re-used.
4470// When an OldRef is used, it is moved as the most recently used entry
4471// (i.e. pointed to by mru.prev).
4472
4473// Removes r from the double linked list
4474// Note: we do not need to test for special cases such as
4475// NULL next or prev pointers, because we have sentinel nodes
4476// at both sides of the list. So, a node is always forward and
4477// backward linked.
4478static inline void OldRef_unchain(OldRef *r)
4479{
4480 r->next->prev = r->prev;
4481 r->prev->next = r->next;
4482}
4483
4484// Insert new as the newest OldRef
4485// Similarly to OldRef_unchain, no need to test for NULL
4486// pointers, as e.g. mru.prev is always guaranteed to point
4487// to a non NULL node (lru when the list is empty).
4488static inline void OldRef_newest(OldRef *new)
4489{
4490 new->next = &mru;
4491 new->prev = mru.prev;
4492 mru.prev = new;
4493 new->prev->next = new;
4494}
sewardjd86e3a22008-12-03 11:39:37 +00004495
sewardjbc307e52008-12-06 22:10:54 +00004496static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
sewardjbc307e52008-12-06 22:10:54 +00004497static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
philippecabdbb52015-04-20 21:33:16 +00004498/* Note: the nr of ref in the oldrefTree will always be equal to
4499 the nr of elements that were allocated from the OldRef pool allocator
4500 as we never free an OldRef : we just re-use them. */
4501
4502
4503/* allocates a new OldRef or re-use the lru one if all allowed OldRef
4504 have already been allocated. */
4505static OldRef* alloc_or_reuse_OldRef ( void )
4506{
4507 if (oldrefTreeN < HG_(clo_conflict_cache_size)) {
4508 oldrefTreeN++;
4509 return VG_(allocEltPA) ( oldref_pool_allocator );
4510 } else {
4511 Bool b;
4512 UWord valW;
4513 OldRef *oldref = lru.next;
4514
4515 OldRef_unchain(oldref);
4516 b = VG_(delFromSWA)( oldrefTree, &valW, oldref->ga );
4517 tl_assert(b);
4518 tl_assert (oldref == (OldRef*)valW);
4519
4520 for (UInt i = 0; i < N_OLDREF_ACCS; i++) {
4521 ThrID aThrID = oldref->accs[i].thrid;
4522 RCEC* aRef = oldref->accs[i].rcec;
4523 if (aRef) {
4524 tl_assert(aThrID != 0);
4525 stats__ctxt_rcdec3++;
4526 ctxt__rcdec( aRef );
4527 } else {
4528 tl_assert(aThrID == 0);
4529 }
4530 }
4531 return oldref;
4532 }
4533}
4534
sewardjf98e1c02008-10-25 16:22:41 +00004535
sewardj1669cc72008-12-13 01:20:21 +00004536inline static UInt min_UInt ( UInt a, UInt b ) {
4537 return a < b ? a : b;
4538}
4539
sewardja781be62008-12-08 00:12:28 +00004540/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4541 first interval is lower, 1 if the first interval is higher, and 0
4542 if there is any overlap. Redundant paranoia with casting is there
4543 following what looked distinctly like a bug in gcc-4.1.2, in which
4544 some of the comparisons were done signedly instead of
4545 unsignedly. */
4546/* Copied from exp-ptrcheck/sg_main.c */
4547static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4548 Addr a2, SizeT n2 ) {
4549 UWord a1w = (UWord)a1;
4550 UWord n1w = (UWord)n1;
4551 UWord a2w = (UWord)a2;
4552 UWord n2w = (UWord)n2;
4553 tl_assert(n1w > 0 && n2w > 0);
4554 if (a1w + n1w <= a2w) return -1L;
4555 if (a2w + n2w <= a1w) return 1L;
4556 return 0;
4557}
4558
sewardjc5ea9962008-12-07 01:41:46 +00004559static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00004560{
sewardjd86e3a22008-12-03 11:39:37 +00004561 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00004562 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00004563 Word i, j;
philippe40648e22015-04-11 11:42:22 +00004564 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004565 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004566
sewardjffce8152011-06-24 10:09:41 +00004567 tl_assert(thr);
4568 ThrID thrid = thr->thrid;
4569 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4570
4571 WordSetID locksHeldW = thr->hgthread->locksetW;
4572
sewardjc5ea9962008-12-07 01:41:46 +00004573 rcec = get_RCEC( thr );
4574 ctxt__rcinc(rcec);
4575
sewardjffce8152011-06-24 10:09:41 +00004576 UInt szLg2B = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004577 switch (szB) {
4578 /* This doesn't look particularly branch-predictor friendly. */
sewardjffce8152011-06-24 10:09:41 +00004579 case 1: szLg2B = 0; break;
4580 case 2: szLg2B = 1; break;
4581 case 4: szLg2B = 2; break;
4582 case 8: szLg2B = 3; break;
sewardjc5ea9962008-12-07 01:41:46 +00004583 default: tl_assert(0);
4584 }
4585
sewardjffce8152011-06-24 10:09:41 +00004586 /* Look in the map to see if we already have a record for this
4587 address. */
philippe40648e22015-04-11 11:42:22 +00004588 b = VG_(lookupSWA)( oldrefTree, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00004589
sewardjd86e3a22008-12-03 11:39:37 +00004590 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00004591
4592 /* We already have a record for this address. We now need to
sewardjffce8152011-06-24 10:09:41 +00004593 see if we have a stack trace pertaining to this (thrid, R/W,
sewardj849b0ed2008-12-21 10:43:10 +00004594 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00004595 ref = (OldRef*)valW;
philippecabdbb52015-04-20 21:33:16 +00004596
4597 tl_assert (ref->ga == a);
sewardjf98e1c02008-10-25 16:22:41 +00004598
sewardjf98e1c02008-10-25 16:22:41 +00004599 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004600 if (ref->accs[i].thrid != thrid)
sewardj849b0ed2008-12-21 10:43:10 +00004601 continue;
sewardjffce8152011-06-24 10:09:41 +00004602 if (ref->accs[i].szLg2B != szLg2B)
4603 continue;
4604 if (ref->accs[i].isW != (UInt)(isW & 1))
sewardj849b0ed2008-12-21 10:43:10 +00004605 continue;
4606 /* else we have a match, so stop looking. */
4607 break;
sewardjf98e1c02008-10-25 16:22:41 +00004608 }
4609
4610 if (i < N_OLDREF_ACCS) {
sewardjffce8152011-06-24 10:09:41 +00004611 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */
sewardjf98e1c02008-10-25 16:22:41 +00004612 if (i > 0) {
4613 Thr_n_RCEC tmp = ref->accs[i-1];
4614 ref->accs[i-1] = ref->accs[i];
4615 ref->accs[i] = tmp;
4616 i--;
4617 }
sewardjc5ea9962008-12-07 01:41:46 +00004618 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00004619 stats__ctxt_rcdec1++;
sewardjffce8152011-06-24 10:09:41 +00004620 ctxt__rcdec( ref->accs[i].rcec );
4621 tl_assert(ref->accs[i].thrid == thrid);
4622 /* Update the RCEC and the W-held lockset. */
4623 ref->accs[i].rcec = rcec;
4624 ref->accs[i].locksHeldW = locksHeldW;
sewardjf98e1c02008-10-25 16:22:41 +00004625 } else {
sewardjffce8152011-06-24 10:09:41 +00004626 /* No entry for this (thread, R/W, size, nWHeld) quad.
4627 Shuffle all of them down one slot, and put the new entry
4628 at the start of the array. */
4629 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
sewardjf98e1c02008-10-25 16:22:41 +00004630 /* the last slot is in use. We must dec the rc on the
4631 associated rcec. */
4632 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
4633 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00004634 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
4635 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjffce8152011-06-24 10:09:41 +00004636 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
sewardjf98e1c02008-10-25 16:22:41 +00004637 } else {
4638 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
4639 }
4640 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
4641 ref->accs[j] = ref->accs[j-1];
sewardjffce8152011-06-24 10:09:41 +00004642 ref->accs[0].thrid = thrid;
4643 ref->accs[0].szLg2B = szLg2B;
4644 ref->accs[0].isW = (UInt)(isW & 1);
4645 ref->accs[0].locksHeldW = locksHeldW;
4646 ref->accs[0].rcec = rcec;
4647 /* thrid==0 is used to signify an empty slot, so we can't
4648 add zero thrid (such a ThrID is invalid anyway). */
4649 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
sewardjf98e1c02008-10-25 16:22:41 +00004650 }
4651
philippecabdbb52015-04-20 21:33:16 +00004652 OldRef_unchain(ref);
4653 OldRef_newest(ref);
sewardjf98e1c02008-10-25 16:22:41 +00004654
4655 } else {
4656
4657 /* We don't have a record for this address. Create a new one. */
philippecabdbb52015-04-20 21:33:16 +00004658 ref = alloc_or_reuse_OldRef();
4659 ref->ga = a;
sewardjffce8152011-06-24 10:09:41 +00004660 ref->accs[0].thrid = thrid;
4661 ref->accs[0].szLg2B = szLg2B;
4662 ref->accs[0].isW = (UInt)(isW & 1);
4663 ref->accs[0].locksHeldW = locksHeldW;
4664 ref->accs[0].rcec = rcec;
4665
4666 /* thrid==0 is used to signify an empty slot, so we can't
4667 add zero thrid (such a ThrID is invalid anyway). */
4668 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4669
4670 /* Clear out the rest of the entries */
sewardjf98e1c02008-10-25 16:22:41 +00004671 for (j = 1; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004672 ref->accs[j].rcec = NULL;
4673 ref->accs[j].thrid = 0;
4674 ref->accs[j].szLg2B = 0;
4675 ref->accs[j].isW = 0;
4676 ref->accs[j].locksHeldW = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004677 }
sewardjbc307e52008-12-06 22:10:54 +00004678 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
philippecabdbb52015-04-20 21:33:16 +00004679 OldRef_newest (ref);
sewardjf98e1c02008-10-25 16:22:41 +00004680 }
4681}
4682
4683
sewardjffce8152011-06-24 10:09:41 +00004684/* Extract info from the conflicting-access machinery. */
sewardjc5ea9962008-12-07 01:41:46 +00004685Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjffce8152011-06-24 10:09:41 +00004686 /*OUT*/Thr** resThr,
4687 /*OUT*/SizeT* resSzB,
4688 /*OUT*/Bool* resIsW,
4689 /*OUT*/WordSetID* locksHeldW,
sewardjc5ea9962008-12-07 01:41:46 +00004690 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00004691{
sewardja781be62008-12-08 00:12:28 +00004692 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00004693 OldRef* ref;
philippe40648e22015-04-11 11:42:22 +00004694 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004695 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004696
sewardjffce8152011-06-24 10:09:41 +00004697 ThrID cand_thrid;
4698 RCEC* cand_rcec;
4699 Bool cand_isW;
4700 SizeT cand_szB;
4701 WordSetID cand_locksHeldW;
4702 Addr cand_a;
sewardja781be62008-12-08 00:12:28 +00004703
4704 Addr toCheck[15];
4705 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004706
4707 tl_assert(thr);
4708 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00004709
sewardjffce8152011-06-24 10:09:41 +00004710 ThrID thrid = thr->thrid;
4711
sewardja781be62008-12-08 00:12:28 +00004712 toCheck[nToCheck++] = a;
4713 for (i = -7; i < (Word)szB; i++) {
4714 if (i != 0)
4715 toCheck[nToCheck++] = a + i;
4716 }
4717 tl_assert(nToCheck <= 15);
4718
4719 /* Now see if we can find a suitable matching event for
4720 any of the addresses in toCheck[0 .. nToCheck-1]. */
4721 for (j = 0; j < nToCheck; j++) {
4722
4723 cand_a = toCheck[j];
4724 // VG_(printf)("test %ld %p\n", j, cand_a);
4725
philippe40648e22015-04-11 11:42:22 +00004726 b = VG_(lookupSWA)( oldrefTree, &valW, cand_a );
sewardja781be62008-12-08 00:12:28 +00004727 if (!b)
4728 continue;
4729
sewardjd86e3a22008-12-03 11:39:37 +00004730 ref = (OldRef*)valW;
sewardjffce8152011-06-24 10:09:41 +00004731 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
sewardjf98e1c02008-10-25 16:22:41 +00004732
sewardjffce8152011-06-24 10:09:41 +00004733 cand_thrid = 0; /* invalid; see comments in event_map_bind */
4734 cand_rcec = NULL;
4735 cand_isW = False;
4736 cand_szB = 0;
4737 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
sewardjf98e1c02008-10-25 16:22:41 +00004738
sewardjc5ea9962008-12-07 01:41:46 +00004739 for (i = 0; i < N_OLDREF_ACCS; i++) {
4740 Thr_n_RCEC* cand = &ref->accs[i];
sewardjffce8152011-06-24 10:09:41 +00004741 cand_rcec = cand->rcec;
4742 cand_thrid = cand->thrid;
4743 cand_isW = (Bool)cand->isW;
4744 cand_szB = 1 << cand->szLg2B;
4745 cand_locksHeldW = cand->locksHeldW;
sewardjc5ea9962008-12-07 01:41:46 +00004746
sewardjffce8152011-06-24 10:09:41 +00004747 if (cand_thrid == 0)
sewardjc5ea9962008-12-07 01:41:46 +00004748 /* This slot isn't in use. Ignore it. */
4749 continue;
4750
sewardjffce8152011-06-24 10:09:41 +00004751 if (cand_thrid == thrid)
sewardjc5ea9962008-12-07 01:41:46 +00004752 /* This is an access by the same thread, but we're only
4753 interested in accesses from other threads. Ignore. */
4754 continue;
4755
4756 if ((!cand_isW) && (!isW))
4757 /* We don't want to report a read racing against another
4758 read; that's stupid. So in this case move on. */
4759 continue;
4760
sewardja781be62008-12-08 00:12:28 +00004761 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
4762 /* No overlap with the access we're asking about. Ignore. */
4763 continue;
4764
sewardjc5ea9962008-12-07 01:41:46 +00004765 /* We have a match. Stop searching. */
4766 break;
4767 }
4768
4769 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
4770
sewardja781be62008-12-08 00:12:28 +00004771 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00004772 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00004773 /* return with success */
sewardjffce8152011-06-24 10:09:41 +00004774 tl_assert(cand_thrid);
sewardja781be62008-12-08 00:12:28 +00004775 tl_assert(cand_rcec);
4776 tl_assert(cand_rcec->magic == RCEC_MAGIC);
4777 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00004778 /* Count how many non-zero frames we have. */
4779 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4780 for (n = 0; n < maxNFrames; n++) {
4781 if (0 == cand_rcec->frames[n]) break;
4782 }
sewardjffce8152011-06-24 10:09:41 +00004783 *resEC = VG_(make_ExeContext_from_StackTrace)
4784 (cand_rcec->frames, n);
4785 *resThr = Thr__from_ThrID(cand_thrid);
4786 *resSzB = cand_szB;
4787 *resIsW = cand_isW;
4788 *locksHeldW = cand_locksHeldW;
sewardja781be62008-12-08 00:12:28 +00004789 return True;
4790 }
sewardjc5ea9962008-12-07 01:41:46 +00004791
sewardja781be62008-12-08 00:12:28 +00004792 /* consider next address in toCheck[] */
4793 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00004794
sewardja781be62008-12-08 00:12:28 +00004795 /* really didn't find anything. */
4796 return False;
sewardjf98e1c02008-10-25 16:22:41 +00004797}
4798
4799static void event_map_init ( void )
4800{
4801 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00004802
philippe6643e962012-01-17 21:16:30 +00004803 /* Context (RCEC) pool allocator */
4804 rcec_pool_allocator = VG_(newPA) (
4805 sizeof(RCEC),
4806 1000 /* RCECs per pool */,
4807 HG_(zalloc),
4808 "libhb.event_map_init.1 (RCEC pools)",
4809 HG_(free)
4810 );
sewardjd86e3a22008-12-03 11:39:37 +00004811
4812 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00004813 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00004814 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00004815 N_RCEC_TAB * sizeof(RCEC*) );
sewardjf98e1c02008-10-25 16:22:41 +00004816 for (i = 0; i < N_RCEC_TAB; i++)
4817 contextTab[i] = NULL;
4818
philippe6643e962012-01-17 21:16:30 +00004819 /* Oldref pool allocator */
4820 oldref_pool_allocator = VG_(newPA)(
4821 sizeof(OldRef),
4822 1000 /* OldRefs per pool */,
4823 HG_(zalloc),
4824 "libhb.event_map_init.3 (OldRef pools)",
4825 HG_(free)
4826 );
sewardjd86e3a22008-12-03 11:39:37 +00004827
sewardjd86e3a22008-12-03 11:39:37 +00004828 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00004829 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00004830 oldrefTree = VG_(newSWA)(
4831 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00004832 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00004833 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00004834 );
sewardjf98e1c02008-10-25 16:22:41 +00004835
sewardjf98e1c02008-10-25 16:22:41 +00004836 oldrefTreeN = 0;
philippecabdbb52015-04-20 21:33:16 +00004837 mru.prev = &lru;
4838 mru.next = NULL;
4839 lru.prev = NULL;
4840 lru.next = &mru;
4841 for (i = 0; i < N_OLDREF_ACCS; i++) {
4842 mru.accs[i] = (Thr_n_RCEC) {.rcec = NULL,
4843 .locksHeldW = 0,
4844 .thrid = 0,
4845 .szLg2B = 0,
4846 .isW = 0};
4847 lru.accs[i] = mru.accs[i];
4848 }
sewardjf98e1c02008-10-25 16:22:41 +00004849}
4850
philippecabdbb52015-04-20 21:33:16 +00004851static void event_map__check_reference_counts ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004852{
4853 RCEC* rcec;
4854 OldRef* oldref;
4855 Word i;
4856 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00004857 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00004858
4859 /* Set the 'check' reference counts to zero. Also, optionally
4860 check that the real reference counts are non-zero. We allow
4861 these to fall to zero before a GC, but the GC must get rid of
4862 all those that are zero, hence none should be zero after a
4863 GC. */
4864 for (i = 0; i < N_RCEC_TAB; i++) {
4865 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4866 nEnts++;
4867 tl_assert(rcec);
4868 tl_assert(rcec->magic == RCEC_MAGIC);
sewardjf98e1c02008-10-25 16:22:41 +00004869 rcec->rcX = 0;
4870 }
4871 }
4872
4873 /* check that the stats are sane */
4874 tl_assert(nEnts == stats__ctxt_tab_curr);
4875 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4876
4877 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00004878 VG_(initIterSWA)( oldrefTree );
4879 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004880 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004881 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004882 ThrID aThrID = oldref->accs[i].thrid;
4883 RCEC* aRef = oldref->accs[i].rcec;
4884 if (aThrID != 0) {
sewardjc5ea9962008-12-07 01:41:46 +00004885 tl_assert(aRef);
4886 tl_assert(aRef->magic == RCEC_MAGIC);
4887 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00004888 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004889 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00004890 }
4891 }
4892 }
4893
4894 /* compare check ref counts with actual */
4895 for (i = 0; i < N_RCEC_TAB; i++) {
4896 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4897 tl_assert(rcec->rc == rcec->rcX);
4898 }
4899 }
4900}
4901
sewardj8fd92d32008-11-20 23:17:01 +00004902__attribute__((noinline))
philippecabdbb52015-04-20 21:33:16 +00004903static void do_RCEC_GC ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004904{
philippecabdbb52015-04-20 21:33:16 +00004905 UInt i;
sewardjf98e1c02008-10-25 16:22:41 +00004906
philippecabdbb52015-04-20 21:33:16 +00004907 if (VG_(clo_stats)) {
4908 static UInt ctr = 1;
4909 VG_(message)(Vg_DebugMsg,
4910 "libhb: RCEC GC: #%u %lu slots,"
4911 " %lu cur ents(ref'd %lu),"
4912 " %lu max ents\n",
4913 ctr++,
4914 (UWord)N_RCEC_TAB,
4915 stats__ctxt_tab_curr, RCEC_referenced,
4916 stats__ctxt_tab_max );
sewardjf98e1c02008-10-25 16:22:41 +00004917 }
philippecabdbb52015-04-20 21:33:16 +00004918 tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004919
4920 /* Throw away all RCECs with zero reference counts */
4921 for (i = 0; i < N_RCEC_TAB; i++) {
4922 RCEC** pp = &contextTab[i];
4923 RCEC* p = *pp;
4924 while (p) {
4925 if (p->rc == 0) {
4926 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004927 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004928 p = *pp;
4929 tl_assert(stats__ctxt_tab_curr > 0);
philippe06bc23a2015-04-17 21:19:43 +00004930 stats__ctxt_rcdec_discards++;
sewardjf98e1c02008-10-25 16:22:41 +00004931 stats__ctxt_tab_curr--;
4932 } else {
4933 pp = &p->next;
4934 p = p->next;
4935 }
4936 }
4937 }
4938
philippecabdbb52015-04-20 21:33:16 +00004939 tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004940}
4941
sewardjf98e1c02008-10-25 16:22:41 +00004942/////////////////////////////////////////////////////////
4943// //
4944// Core MSM //
4945// //
4946/////////////////////////////////////////////////////////
4947
sewardj23f12002009-07-24 08:45:08 +00004948/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4949 Nov 08, and again after [...],
4950 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004951
sewardj23f12002009-07-24 08:45:08 +00004952static ULong stats__msmcread = 0;
4953static ULong stats__msmcread_change = 0;
4954static ULong stats__msmcwrite = 0;
4955static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004956
sewardj8ab2c132009-08-02 09:34:35 +00004957/* Some notes on the H1 history mechanism:
4958
4959 Transition rules are:
4960
4961 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4962 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4963
4964 After any access by a thread T to a location L, L's constraint pair
4965 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4966
4967 After a race by thread T conflicting with some previous access by
4968 some other thread U, for a location with constraint (before
4969 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4970 which the previously access lies.
4971
4972 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4973 are compared so as to find out which thread(s) this access
4974 conflicts with. Once that is established, we also require the
4975 pre-update Cw for the location, so we can index into it for those
4976 threads, to get the scalar clock values for the point at which the
4977 former accesses were made. (In fact we only bother to do any of
4978 this for an arbitrarily chosen one of the conflicting threads, as
4979 that's simpler, it avoids flooding the user with vast amounts of
4980 mostly useless information, and because the program is wrong if it
4981 contains any races at all -- so we don't really need to show all
4982 conflicting access pairs initially, so long as we only show none if
4983 none exist).
4984
4985 ---
4986
4987 That requires the auxiliary proof that
4988
4989 (Cr `join` Kw)[T] == Kw[T]
4990
4991 Why should that be true? Because for any thread T, Kw[T] >= the
4992 scalar clock value for T known by any other thread. In other
4993 words, because T's value for its own scalar clock is at least as up
4994 to date as the value for it known by any other thread (that is true
4995 for both the R- and W- scalar clocks). Hence no other thread will
4996 be able to feed in a value for that element (indirectly via a
4997 constraint) which will exceed Kw[T], and hence the join cannot
4998 cause that particular element to advance.
4999*/
5000
sewardjf98e1c02008-10-25 16:22:41 +00005001__attribute__((noinline))
5002static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00005003 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00005004 VtsID Cfailed,
5005 VtsID Kfailed,
5006 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00005007{
sewardjc5ea9962008-12-07 01:41:46 +00005008 /* Call here to report a race. We just hand it onwards to
5009 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00005010 error is going to be collected, then, at history_level 2, that
5011 queries the conflicting-event map. The alternative would be to
5012 query it right here. But that causes a lot of pointless queries
5013 for errors which will shortly be discarded as duplicates, and
5014 can become a performance overhead; so we defer the query until
5015 we know the error is not a duplicate. */
5016
5017 /* Stacks for the bounds of the (or one of the) conflicting
5018 segment(s). These are only set at history_level 1. */
5019 ExeContext* hist1_seg_start = NULL;
5020 ExeContext* hist1_seg_end = NULL;
5021 Thread* hist1_conf_thr = NULL;
5022
5023 tl_assert(acc_thr);
sewardj60626642011-03-10 15:14:37 +00005024 tl_assert(acc_thr->hgthread);
5025 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
sewardj23f12002009-07-24 08:45:08 +00005026 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
5027
5028 if (HG_(clo_history_level) == 1) {
5029 Bool found;
5030 Word firstIx, lastIx;
5031 ULong_n_EC key;
5032
5033 /* At history_level 1, we must round up the relevant stack-pair
5034 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00005035 deferring it is complex; we can't (easily) put Kfailed and
5036 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00005037 getting tied up in difficulties with VtsID reference
5038 counting. So just do it now. */
5039 Thr* confThr;
5040 ULong confTym = 0;
5041 /* Which thread are we in conflict with? There may be more than
5042 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
5043 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00005044 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00005045 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00005046 conflict (semantics of return value of
5047 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
5048 called us, just checked exactly this -- that there was in
5049 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00005050 tl_assert(confThr);
5051
5052 /* Get the scalar clock value that the conflicting thread
5053 introduced into the constraint. A careful examination of the
5054 base machine rules shows that this must be the same as the
5055 conflicting thread's scalar clock when it created this
5056 constraint. Hence we know the scalar clock of the
5057 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00005058 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00005059
5060 /* Using this scalar clock, index into the conflicting thread's
5061 collection of stack traces made each time its vector clock
5062 (hence its scalar clock) changed. This gives the stack
5063 traces at the start and end of the conflicting segment (well,
5064 as per comment just above, of one of the conflicting
5065 segments, if there are more than one). */
5066 key.ull = confTym;
5067 key.ec = NULL;
5068 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00005069 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00005070 firstIx = lastIx = 0;
5071 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00005072 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00005073 &key, &firstIx, &lastIx,
florian6bd9dc12012-11-23 16:17:43 +00005074 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
sewardj23f12002009-07-24 08:45:08 +00005075 );
sewardj8ab2c132009-08-02 09:34:35 +00005076 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00005077 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00005078 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00005079 confThr, confTym, found, firstIx, lastIx);
5080 /* We can't indefinitely collect stack traces at VTS
5081 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00005082 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00005083 ones, which in turn means we might fail to find index value
5084 confTym in the array. */
5085 if (found) {
5086 ULong_n_EC *pair_start, *pair_end;
5087 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00005088 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00005089 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00005090 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00005091 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00005092 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00005093 lastIx+1 );
5094 /* from properties of VG_(lookupXA) and the comparison fn used: */
5095 tl_assert(pair_start->ull < pair_end->ull);
5096 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00005097 /* Could do a bit better here. It may be that pair_end
5098 doesn't have a stack, but the following entries in the
5099 array have the same scalar Kw and to have a stack. So
5100 we should search a bit further along the array than
5101 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00005102 } else {
sewardjffce8152011-06-24 10:09:41 +00005103 if (!confThr->llexit_done)
sewardj23f12002009-07-24 08:45:08 +00005104 hist1_seg_end = main_get_EC( confThr );
5105 }
5106 // seg_start could be NULL iff this is the first stack in the thread
5107 //if (seg_start) VG_(pp_ExeContext)(seg_start);
5108 //if (seg_end) VG_(pp_ExeContext)(seg_end);
sewardj60626642011-03-10 15:14:37 +00005109 hist1_conf_thr = confThr->hgthread;
sewardj23f12002009-07-24 08:45:08 +00005110 }
5111 }
5112
sewardj60626642011-03-10 15:14:37 +00005113 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00005114 szB, isWrite,
5115 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00005116}
5117
5118static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00005119 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00005120 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00005121 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
5122 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00005123}
5124
5125
5126/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00005127static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005128 /* The following are only needed for
5129 creating error reports. */
5130 Thr* acc_thr,
5131 Addr acc_addr, SizeT szB )
5132{
5133 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005134 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00005135
5136 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005137 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005138 tl_assert(is_sane_SVal_C(svOld));
5139 }
5140
sewardj1c0ce7a2009-07-01 08:10:49 +00005141 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005142 VtsID tviR = acc_thr->viR;
5143 VtsID tviW = acc_thr->viW;
5144 VtsID rmini = SVal__unC_Rmin(svOld);
5145 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005146 Bool leq = VtsID__cmpLEQ(rmini,tviR);
5147 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005148 /* no race */
5149 /* Note: RWLOCK subtlety: use tviW, not tviR */
5150 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5151 goto out;
5152 } else {
sewardjb0e009d2008-11-19 16:35:15 +00005153 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005154 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5155 tl_assert(leqxx);
5156 // same as in non-race case
5157 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5158 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005159 rmini, /* Cfailed */
5160 tviR, /* Kfailed */
5161 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005162 goto out;
5163 }
5164 }
5165 if (SVal__isA(svOld)) {
5166 /* reading no-access memory (sigh); leave unchanged */
5167 /* check for no pollution */
5168 tl_assert(svOld == SVal_NOACCESS);
5169 svNew = SVal_NOACCESS;
5170 goto out;
5171 }
sewardj23f12002009-07-24 08:45:08 +00005172 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005173 tl_assert(0);
5174
5175 out:
sewardj8f5374e2008-12-07 11:40:17 +00005176 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005177 tl_assert(is_sane_SVal_C(svNew));
5178 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005179 if (UNLIKELY(svNew != svOld)) {
5180 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005181 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005182 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005183 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005184 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005185 }
5186 }
5187 return svNew;
5188}
5189
5190
5191/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00005192static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005193 /* The following are only needed for
5194 creating error reports. */
5195 Thr* acc_thr,
5196 Addr acc_addr, SizeT szB )
5197{
5198 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005199 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00005200
5201 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005202 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005203 tl_assert(is_sane_SVal_C(svOld));
5204 }
5205
sewardj1c0ce7a2009-07-01 08:10:49 +00005206 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005207 VtsID tviW = acc_thr->viW;
5208 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005209 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5210 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005211 /* no race */
5212 svNew = SVal__mkC( tviW, tviW );
5213 goto out;
5214 } else {
5215 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00005216 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005217 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5218 tl_assert(leqxx);
5219 // same as in non-race case
5220 // proof: in the non-race case, we have
5221 // rmini <= wmini (invar on constraints)
5222 // tviW <= tviR (invar on thread clocks)
5223 // wmini <= tviW (from run-time check)
5224 // hence from transitivity of <= we have
5225 // rmini <= wmini <= tviW
5226 // and so join(rmini,tviW) == tviW
5227 // and join(wmini,tviW) == tviW
5228 // qed.
5229 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5230 VtsID__join2(wmini, tviW) );
5231 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005232 wmini, /* Cfailed */
5233 tviW, /* Kfailed */
5234 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005235 goto out;
5236 }
5237 }
5238 if (SVal__isA(svOld)) {
5239 /* writing no-access memory (sigh); leave unchanged */
5240 /* check for no pollution */
5241 tl_assert(svOld == SVal_NOACCESS);
5242 svNew = SVal_NOACCESS;
5243 goto out;
5244 }
sewardj23f12002009-07-24 08:45:08 +00005245 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005246 tl_assert(0);
5247
5248 out:
sewardj8f5374e2008-12-07 11:40:17 +00005249 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005250 tl_assert(is_sane_SVal_C(svNew));
5251 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005252 if (UNLIKELY(svNew != svOld)) {
5253 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005254 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005255 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005256 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005257 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005258 }
5259 }
5260 return svNew;
5261}
5262
5263
5264/////////////////////////////////////////////////////////
5265// //
5266// Apply core MSM to specific memory locations //
5267// //
5268/////////////////////////////////////////////////////////
5269
sewardj23f12002009-07-24 08:45:08 +00005270/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005271
sewardj23f12002009-07-24 08:45:08 +00005272static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005273 CacheLine* cl;
5274 UWord cloff, tno, toff;
5275 SVal svOld, svNew;
5276 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005277 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005278 cl = get_cacheline(a);
5279 cloff = get_cacheline_offset(a);
5280 tno = get_treeno(a);
5281 toff = get_tree_offset(a); /* == 0 .. 7 */
5282 descr = cl->descrs[tno];
5283 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5284 SVal* tree = &cl->svals[tno << 3];
5285 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005286 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005287 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5288 }
5289 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005290 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005291 if (CHECK_ZSM)
5292 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005293 cl->svals[cloff] = svNew;
5294}
5295
sewardj23f12002009-07-24 08:45:08 +00005296static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005297 CacheLine* cl;
5298 UWord cloff, tno, toff;
5299 SVal svOld, svNew;
5300 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005301 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005302 cl = get_cacheline(a);
5303 cloff = get_cacheline_offset(a);
5304 tno = get_treeno(a);
5305 toff = get_tree_offset(a); /* == 0 .. 7 */
5306 descr = cl->descrs[tno];
5307 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5308 SVal* tree = &cl->svals[tno << 3];
5309 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005310 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005311 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5312 }
5313 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005314 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005315 if (CHECK_ZSM)
5316 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005317 cl->svals[cloff] = svNew;
5318}
5319
sewardj23f12002009-07-24 08:45:08 +00005320/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005321
sewardj23f12002009-07-24 08:45:08 +00005322static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005323 CacheLine* cl;
5324 UWord cloff, tno, toff;
5325 SVal svOld, svNew;
5326 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005327 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005328 if (UNLIKELY(!aligned16(a))) goto slowcase;
5329 cl = get_cacheline(a);
5330 cloff = get_cacheline_offset(a);
5331 tno = get_treeno(a);
5332 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5333 descr = cl->descrs[tno];
5334 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5335 if (valid_value_is_below_me_16(descr, toff)) {
5336 goto slowcase;
5337 } else {
5338 SVal* tree = &cl->svals[tno << 3];
5339 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5340 }
sewardj8f5374e2008-12-07 11:40:17 +00005341 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005342 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5343 }
5344 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005345 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005346 if (CHECK_ZSM)
5347 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005348 cl->svals[cloff] = svNew;
5349 return;
5350 slowcase: /* misaligned, or must go further down the tree */
5351 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005352 zsm_sapply08__msmcread( thr, a + 0 );
5353 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005354}
5355
sewardj23f12002009-07-24 08:45:08 +00005356static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005357 CacheLine* cl;
5358 UWord cloff, tno, toff;
5359 SVal svOld, svNew;
5360 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005361 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005362 if (UNLIKELY(!aligned16(a))) goto slowcase;
5363 cl = get_cacheline(a);
5364 cloff = get_cacheline_offset(a);
5365 tno = get_treeno(a);
5366 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5367 descr = cl->descrs[tno];
5368 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5369 if (valid_value_is_below_me_16(descr, toff)) {
5370 goto slowcase;
5371 } else {
5372 SVal* tree = &cl->svals[tno << 3];
5373 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5374 }
sewardj8f5374e2008-12-07 11:40:17 +00005375 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005376 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5377 }
5378 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005379 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005380 if (CHECK_ZSM)
5381 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005382 cl->svals[cloff] = svNew;
5383 return;
5384 slowcase: /* misaligned, or must go further down the tree */
5385 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005386 zsm_sapply08__msmcwrite( thr, a + 0 );
5387 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005388}
5389
sewardj23f12002009-07-24 08:45:08 +00005390/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005391
sewardj23f12002009-07-24 08:45:08 +00005392static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005393 CacheLine* cl;
5394 UWord cloff, tno, toff;
5395 SVal svOld, svNew;
5396 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005397 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005398 if (UNLIKELY(!aligned32(a))) goto slowcase;
5399 cl = get_cacheline(a);
5400 cloff = get_cacheline_offset(a);
5401 tno = get_treeno(a);
5402 toff = get_tree_offset(a); /* == 0 or 4 */
5403 descr = cl->descrs[tno];
5404 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5405 if (valid_value_is_above_me_32(descr, toff)) {
5406 SVal* tree = &cl->svals[tno << 3];
5407 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5408 } else {
5409 goto slowcase;
5410 }
sewardj8f5374e2008-12-07 11:40:17 +00005411 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005412 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5413 }
5414 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005415 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005416 if (CHECK_ZSM)
5417 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005418 cl->svals[cloff] = svNew;
5419 return;
5420 slowcase: /* misaligned, or must go further down the tree */
5421 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005422 zsm_sapply16__msmcread( thr, a + 0 );
5423 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005424}
5425
sewardj23f12002009-07-24 08:45:08 +00005426static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005427 CacheLine* cl;
5428 UWord cloff, tno, toff;
5429 SVal svOld, svNew;
5430 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005431 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005432 if (UNLIKELY(!aligned32(a))) goto slowcase;
5433 cl = get_cacheline(a);
5434 cloff = get_cacheline_offset(a);
5435 tno = get_treeno(a);
5436 toff = get_tree_offset(a); /* == 0 or 4 */
5437 descr = cl->descrs[tno];
5438 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5439 if (valid_value_is_above_me_32(descr, toff)) {
5440 SVal* tree = &cl->svals[tno << 3];
5441 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5442 } else {
5443 goto slowcase;
5444 }
sewardj8f5374e2008-12-07 11:40:17 +00005445 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005446 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5447 }
5448 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005449 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005450 if (CHECK_ZSM)
5451 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005452 cl->svals[cloff] = svNew;
5453 return;
5454 slowcase: /* misaligned, or must go further down the tree */
5455 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005456 zsm_sapply16__msmcwrite( thr, a + 0 );
5457 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005458}
5459
sewardj23f12002009-07-24 08:45:08 +00005460/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005461
sewardj23f12002009-07-24 08:45:08 +00005462static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005463 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005464 UWord cloff, tno;
5465 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005466 SVal svOld, svNew;
5467 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005468 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005469 if (UNLIKELY(!aligned64(a))) goto slowcase;
5470 cl = get_cacheline(a);
5471 cloff = get_cacheline_offset(a);
5472 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005473 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005474 descr = cl->descrs[tno];
5475 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5476 goto slowcase;
5477 }
5478 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005479 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005480 if (CHECK_ZSM)
5481 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005482 cl->svals[cloff] = svNew;
5483 return;
5484 slowcase: /* misaligned, or must go further down the tree */
5485 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005486 zsm_sapply32__msmcread( thr, a + 0 );
5487 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005488}
5489
sewardj23f12002009-07-24 08:45:08 +00005490static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005491 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005492 UWord cloff, tno;
5493 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005494 SVal svOld, svNew;
5495 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005496 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005497 if (UNLIKELY(!aligned64(a))) goto slowcase;
5498 cl = get_cacheline(a);
5499 cloff = get_cacheline_offset(a);
5500 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005501 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005502 descr = cl->descrs[tno];
5503 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5504 goto slowcase;
5505 }
5506 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005507 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005508 if (CHECK_ZSM)
5509 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005510 cl->svals[cloff] = svNew;
5511 return;
5512 slowcase: /* misaligned, or must go further down the tree */
5513 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005514 zsm_sapply32__msmcwrite( thr, a + 0 );
5515 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005516}
5517
sewardj23f12002009-07-24 08:45:08 +00005518/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005519
5520static
sewardj23f12002009-07-24 08:45:08 +00005521void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005522 CacheLine* cl;
5523 UWord cloff, tno, toff;
5524 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005525 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005526 cl = get_cacheline(a);
5527 cloff = get_cacheline_offset(a);
5528 tno = get_treeno(a);
5529 toff = get_tree_offset(a); /* == 0 .. 7 */
5530 descr = cl->descrs[tno];
5531 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5532 SVal* tree = &cl->svals[tno << 3];
5533 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005534 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005535 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5536 }
5537 tl_assert(svNew != SVal_INVALID);
5538 cl->svals[cloff] = svNew;
5539}
5540
sewardj23f12002009-07-24 08:45:08 +00005541/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005542
5543static
sewardj23f12002009-07-24 08:45:08 +00005544void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005545 CacheLine* cl;
5546 UWord cloff, tno, toff;
5547 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005548 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005549 if (UNLIKELY(!aligned16(a))) goto slowcase;
5550 cl = get_cacheline(a);
5551 cloff = get_cacheline_offset(a);
5552 tno = get_treeno(a);
5553 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5554 descr = cl->descrs[tno];
5555 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5556 if (valid_value_is_below_me_16(descr, toff)) {
5557 /* Writing at this level. Need to fix up 'descr'. */
5558 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5559 /* At this point, the tree does not match cl->descr[tno] any
5560 more. The assignments below will fix it up. */
5561 } else {
5562 /* We can't indiscriminately write on the w16 node as in the
5563 w64 case, as that might make the node inconsistent with
5564 its parent. So first, pull down to this level. */
5565 SVal* tree = &cl->svals[tno << 3];
5566 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005567 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005568 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5569 }
5570 }
5571 tl_assert(svNew != SVal_INVALID);
5572 cl->svals[cloff + 0] = svNew;
5573 cl->svals[cloff + 1] = SVal_INVALID;
5574 return;
5575 slowcase: /* misaligned */
5576 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005577 zsm_swrite08( a + 0, svNew );
5578 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005579}
5580
sewardj23f12002009-07-24 08:45:08 +00005581/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005582
5583static
sewardj23f12002009-07-24 08:45:08 +00005584void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005585 CacheLine* cl;
5586 UWord cloff, tno, toff;
5587 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005588 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005589 if (UNLIKELY(!aligned32(a))) goto slowcase;
5590 cl = get_cacheline(a);
5591 cloff = get_cacheline_offset(a);
5592 tno = get_treeno(a);
5593 toff = get_tree_offset(a); /* == 0 or 4 */
5594 descr = cl->descrs[tno];
5595 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5596 if (valid_value_is_above_me_32(descr, toff)) {
5597 /* We can't indiscriminately write on the w32 node as in the
5598 w64 case, as that might make the node inconsistent with
5599 its parent. So first, pull down to this level. */
5600 SVal* tree = &cl->svals[tno << 3];
5601 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005602 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005603 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5604 } else {
5605 /* Writing at this level. Need to fix up 'descr'. */
5606 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5607 /* At this point, the tree does not match cl->descr[tno] any
5608 more. The assignments below will fix it up. */
5609 }
5610 }
5611 tl_assert(svNew != SVal_INVALID);
5612 cl->svals[cloff + 0] = svNew;
5613 cl->svals[cloff + 1] = SVal_INVALID;
5614 cl->svals[cloff + 2] = SVal_INVALID;
5615 cl->svals[cloff + 3] = SVal_INVALID;
5616 return;
5617 slowcase: /* misaligned */
5618 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005619 zsm_swrite16( a + 0, svNew );
5620 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005621}
5622
sewardj23f12002009-07-24 08:45:08 +00005623/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005624
5625static
sewardj23f12002009-07-24 08:45:08 +00005626void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005627 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005628 UWord cloff, tno;
5629 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00005630 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005631 if (UNLIKELY(!aligned64(a))) goto slowcase;
5632 cl = get_cacheline(a);
5633 cloff = get_cacheline_offset(a);
5634 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005635 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005636 cl->descrs[tno] = TREE_DESCR_64;
5637 tl_assert(svNew != SVal_INVALID);
5638 cl->svals[cloff + 0] = svNew;
5639 cl->svals[cloff + 1] = SVal_INVALID;
5640 cl->svals[cloff + 2] = SVal_INVALID;
5641 cl->svals[cloff + 3] = SVal_INVALID;
5642 cl->svals[cloff + 4] = SVal_INVALID;
5643 cl->svals[cloff + 5] = SVal_INVALID;
5644 cl->svals[cloff + 6] = SVal_INVALID;
5645 cl->svals[cloff + 7] = SVal_INVALID;
5646 return;
5647 slowcase: /* misaligned */
5648 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005649 zsm_swrite32( a + 0, svNew );
5650 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005651}
5652
sewardj23f12002009-07-24 08:45:08 +00005653/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005654
5655static
sewardj23f12002009-07-24 08:45:08 +00005656SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005657 CacheLine* cl;
5658 UWord cloff, tno, toff;
5659 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005660 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005661 cl = get_cacheline(a);
5662 cloff = get_cacheline_offset(a);
5663 tno = get_treeno(a);
5664 toff = get_tree_offset(a); /* == 0 .. 7 */
5665 descr = cl->descrs[tno];
5666 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5667 SVal* tree = &cl->svals[tno << 3];
5668 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5669 }
5670 return cl->svals[cloff];
5671}
5672
sewardj23f12002009-07-24 08:45:08 +00005673static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00005674 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00005675 stats__cline_scopy08s++;
5676 sv = zsm_sread08( src );
5677 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00005678}
5679
5680
sewardj23f12002009-07-24 08:45:08 +00005681/* Block-copy states (needed for implementing realloc()). Note this
5682 doesn't change the filtering arrangements. The caller of
5683 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00005684
sewardj23f12002009-07-24 08:45:08 +00005685static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005686{
5687 SizeT i;
5688 if (len == 0)
5689 return;
5690
5691 /* assert for non-overlappingness */
5692 tl_assert(src+len <= dst || dst+len <= src);
5693
5694 /* To be simple, just copy byte by byte. But so as not to wreck
5695 performance for later accesses to dst[0 .. len-1], normalise
5696 destination lines as we finish with them, and also normalise the
5697 line containing the first and last address. */
5698 for (i = 0; i < len; i++) {
5699 Bool normalise
5700 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5701 || i == 0 /* first in range */
5702 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00005703 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00005704 }
5705}
5706
5707
5708/* For setting address ranges to a given value. Has considerable
5709 sophistication so as to avoid generating large numbers of pointless
5710 cache loads/writebacks for large ranges. */
5711
5712/* Do small ranges in-cache, in the obvious way. */
5713static
sewardj23f12002009-07-24 08:45:08 +00005714void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005715{
5716 /* fast track a couple of common cases */
5717 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00005718 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005719 return;
5720 }
5721 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00005722 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005723 return;
5724 }
5725
5726 /* be completely general (but as efficient as possible) */
5727 if (len == 0) return;
5728
5729 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005730 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005731 a += 1;
5732 len -= 1;
5733 tl_assert(aligned16(a));
5734 }
5735 if (len == 0) return;
5736
5737 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005738 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005739 a += 2;
5740 len -= 2;
5741 tl_assert(aligned32(a));
5742 }
5743 if (len == 0) return;
5744
5745 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005746 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005747 a += 4;
5748 len -= 4;
5749 tl_assert(aligned64(a));
5750 }
5751 if (len == 0) return;
5752
5753 if (len >= 8) {
5754 tl_assert(aligned64(a));
5755 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00005756 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005757 a += 8;
5758 len -= 8;
5759 }
5760 tl_assert(aligned64(a));
5761 }
5762 if (len == 0) return;
5763
5764 if (len >= 4)
5765 tl_assert(aligned32(a));
5766 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005767 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005768 a += 4;
5769 len -= 4;
5770 }
5771 if (len == 0) return;
5772
5773 if (len >= 2)
5774 tl_assert(aligned16(a));
5775 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005776 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005777 a += 2;
5778 len -= 2;
5779 }
5780 if (len == 0) return;
5781
5782 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005783 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00005784 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00005785 len -= 1;
5786 }
5787 tl_assert(len == 0);
5788}
5789
5790
sewardj23f12002009-07-24 08:45:08 +00005791/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00005792 for larger ranges, try to operate directly on the out-of-cache
5793 representation, rather than dragging lines into the cache,
5794 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00005795 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00005796
sewardj23f12002009-07-24 08:45:08 +00005797 Note that this doesn't change the filtering arrangements. The
5798 caller of zsm_sset_range needs to attend to that. */
5799
5800static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005801{
5802 tl_assert(svNew != SVal_INVALID);
5803 stats__cache_make_New_arange += (ULong)len;
5804
5805 if (0 && len > 500)
5806 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
5807
5808 if (0) {
5809 static UWord n_New_in_cache = 0;
5810 static UWord n_New_not_in_cache = 0;
5811 /* tag is 'a' with the in-line offset masked out,
5812 eg a[31]..a[4] 0000 */
5813 Addr tag = a & ~(N_LINE_ARANGE - 1);
5814 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5815 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5816 n_New_in_cache++;
5817 } else {
5818 n_New_not_in_cache++;
5819 }
5820 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5821 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5822 n_New_in_cache, n_New_not_in_cache );
5823 }
5824
5825 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00005826 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005827 } else {
5828 Addr before_start = a;
5829 Addr aligned_start = cacheline_ROUNDUP(a);
5830 Addr after_start = cacheline_ROUNDDN(a + len);
5831 UWord before_len = aligned_start - before_start;
5832 UWord aligned_len = after_start - aligned_start;
5833 UWord after_len = a + len - after_start;
5834 tl_assert(before_start <= aligned_start);
5835 tl_assert(aligned_start <= after_start);
5836 tl_assert(before_len < N_LINE_ARANGE);
5837 tl_assert(after_len < N_LINE_ARANGE);
5838 tl_assert(get_cacheline_offset(aligned_start) == 0);
5839 if (get_cacheline_offset(a) == 0) {
5840 tl_assert(before_len == 0);
5841 tl_assert(a == aligned_start);
5842 }
5843 if (get_cacheline_offset(a+len) == 0) {
5844 tl_assert(after_len == 0);
5845 tl_assert(after_start == a+len);
5846 }
5847 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005848 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005849 }
5850 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005851 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005852 }
5853 stats__cache_make_New_inZrep += (ULong)aligned_len;
5854
5855 while (1) {
5856 Addr tag;
5857 UWord wix;
5858 if (aligned_start >= after_start)
5859 break;
5860 tl_assert(get_cacheline_offset(aligned_start) == 0);
5861 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5862 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5863 if (tag == cache_shmem.tags0[wix]) {
5864 UWord i;
5865 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005866 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005867 } else {
5868 UWord i;
5869 Word zix;
5870 SecMap* sm;
5871 LineZ* lineZ;
5872 /* This line is not in the cache. Do not force it in; instead
5873 modify it in-place. */
5874 /* find the Z line to write in and rcdec it or the
5875 associated F line. */
5876 find_Z_for_writing( &sm, &zix, tag );
5877 tl_assert(sm);
5878 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5879 lineZ = &sm->linesZ[zix];
5880 lineZ->dict[0] = svNew;
5881 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5882 for (i = 0; i < N_LINE_ARANGE/4; i++)
5883 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5884 rcinc_LineZ(lineZ);
5885 }
5886 aligned_start += N_LINE_ARANGE;
5887 aligned_len -= N_LINE_ARANGE;
5888 }
5889 tl_assert(aligned_start == after_start);
5890 tl_assert(aligned_len == 0);
5891 }
5892}
5893
5894
5895/////////////////////////////////////////////////////////
5896// //
sewardj23f12002009-07-24 08:45:08 +00005897// Front-filtering accesses //
5898// //
5899/////////////////////////////////////////////////////////
5900
5901static UWord stats__f_ac = 0;
5902static UWord stats__f_sk = 0;
5903
5904#if 0
5905# define STATS__F_SHOW \
5906 do { \
5907 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5908 VG_(printf)("filters: ac %lu sk %lu\n", \
5909 stats__f_ac, stats__f_sk); \
5910 } while (0)
5911#else
5912# define STATS__F_SHOW /* */
5913#endif
5914
5915void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5916 stats__f_ac++;
5917 STATS__F_SHOW;
5918 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5919 stats__f_sk++;
5920 return;
5921 }
5922 zsm_sapply08__msmcwrite(thr, a);
5923}
5924
5925void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5926 stats__f_ac++;
5927 STATS__F_SHOW;
5928 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5929 stats__f_sk++;
5930 return;
5931 }
5932 zsm_sapply16__msmcwrite(thr, a);
5933}
5934
5935void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5936 stats__f_ac++;
5937 STATS__F_SHOW;
5938 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5939 stats__f_sk++;
5940 return;
5941 }
5942 zsm_sapply32__msmcwrite(thr, a);
5943}
5944
5945void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5946 stats__f_ac++;
5947 STATS__F_SHOW;
5948 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5949 stats__f_sk++;
5950 return;
5951 }
5952 zsm_sapply64__msmcwrite(thr, a);
5953}
5954
5955void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5956{
5957 /* fast track a couple of common cases */
5958 if (len == 4 && aligned32(a)) {
5959 zsm_sapply32_f__msmcwrite( thr, a );
5960 return;
5961 }
5962 if (len == 8 && aligned64(a)) {
5963 zsm_sapply64_f__msmcwrite( thr, a );
5964 return;
5965 }
5966
5967 /* be completely general (but as efficient as possible) */
5968 if (len == 0) return;
5969
5970 if (!aligned16(a) && len >= 1) {
5971 zsm_sapply08_f__msmcwrite( thr, a );
5972 a += 1;
5973 len -= 1;
5974 tl_assert(aligned16(a));
5975 }
5976 if (len == 0) return;
5977
5978 if (!aligned32(a) && len >= 2) {
5979 zsm_sapply16_f__msmcwrite( thr, a );
5980 a += 2;
5981 len -= 2;
5982 tl_assert(aligned32(a));
5983 }
5984 if (len == 0) return;
5985
5986 if (!aligned64(a) && len >= 4) {
5987 zsm_sapply32_f__msmcwrite( thr, a );
5988 a += 4;
5989 len -= 4;
5990 tl_assert(aligned64(a));
5991 }
5992 if (len == 0) return;
5993
5994 if (len >= 8) {
5995 tl_assert(aligned64(a));
5996 while (len >= 8) {
5997 zsm_sapply64_f__msmcwrite( thr, a );
5998 a += 8;
5999 len -= 8;
6000 }
6001 tl_assert(aligned64(a));
6002 }
6003 if (len == 0) return;
6004
6005 if (len >= 4)
6006 tl_assert(aligned32(a));
6007 if (len >= 4) {
6008 zsm_sapply32_f__msmcwrite( thr, a );
6009 a += 4;
6010 len -= 4;
6011 }
6012 if (len == 0) return;
6013
6014 if (len >= 2)
6015 tl_assert(aligned16(a));
6016 if (len >= 2) {
6017 zsm_sapply16_f__msmcwrite( thr, a );
6018 a += 2;
6019 len -= 2;
6020 }
6021 if (len == 0) return;
6022
6023 if (len >= 1) {
6024 zsm_sapply08_f__msmcwrite( thr, a );
6025 //a += 1;
6026 len -= 1;
6027 }
6028 tl_assert(len == 0);
6029}
6030
6031void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
6032 stats__f_ac++;
6033 STATS__F_SHOW;
6034 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
6035 stats__f_sk++;
6036 return;
6037 }
6038 zsm_sapply08__msmcread(thr, a);
6039}
6040
6041void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
6042 stats__f_ac++;
6043 STATS__F_SHOW;
6044 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
6045 stats__f_sk++;
6046 return;
6047 }
6048 zsm_sapply16__msmcread(thr, a);
6049}
6050
6051void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
6052 stats__f_ac++;
6053 STATS__F_SHOW;
6054 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
6055 stats__f_sk++;
6056 return;
6057 }
6058 zsm_sapply32__msmcread(thr, a);
6059}
6060
6061void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
6062 stats__f_ac++;
6063 STATS__F_SHOW;
6064 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
6065 stats__f_sk++;
6066 return;
6067 }
6068 zsm_sapply64__msmcread(thr, a);
6069}
6070
6071void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
6072{
6073 /* fast track a couple of common cases */
6074 if (len == 4 && aligned32(a)) {
6075 zsm_sapply32_f__msmcread( thr, a );
6076 return;
6077 }
6078 if (len == 8 && aligned64(a)) {
6079 zsm_sapply64_f__msmcread( thr, a );
6080 return;
6081 }
6082
6083 /* be completely general (but as efficient as possible) */
6084 if (len == 0) return;
6085
6086 if (!aligned16(a) && len >= 1) {
6087 zsm_sapply08_f__msmcread( thr, a );
6088 a += 1;
6089 len -= 1;
6090 tl_assert(aligned16(a));
6091 }
6092 if (len == 0) return;
6093
6094 if (!aligned32(a) && len >= 2) {
6095 zsm_sapply16_f__msmcread( thr, a );
6096 a += 2;
6097 len -= 2;
6098 tl_assert(aligned32(a));
6099 }
6100 if (len == 0) return;
6101
6102 if (!aligned64(a) && len >= 4) {
6103 zsm_sapply32_f__msmcread( thr, a );
6104 a += 4;
6105 len -= 4;
6106 tl_assert(aligned64(a));
6107 }
6108 if (len == 0) return;
6109
6110 if (len >= 8) {
6111 tl_assert(aligned64(a));
6112 while (len >= 8) {
6113 zsm_sapply64_f__msmcread( thr, a );
6114 a += 8;
6115 len -= 8;
6116 }
6117 tl_assert(aligned64(a));
6118 }
6119 if (len == 0) return;
6120
6121 if (len >= 4)
6122 tl_assert(aligned32(a));
6123 if (len >= 4) {
6124 zsm_sapply32_f__msmcread( thr, a );
6125 a += 4;
6126 len -= 4;
6127 }
6128 if (len == 0) return;
6129
6130 if (len >= 2)
6131 tl_assert(aligned16(a));
6132 if (len >= 2) {
6133 zsm_sapply16_f__msmcread( thr, a );
6134 a += 2;
6135 len -= 2;
6136 }
6137 if (len == 0) return;
6138
6139 if (len >= 1) {
6140 zsm_sapply08_f__msmcread( thr, a );
6141 //a += 1;
6142 len -= 1;
6143 }
6144 tl_assert(len == 0);
6145}
6146
6147void libhb_Thr_resumes ( Thr* thr )
6148{
6149 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006150 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006151 tl_assert(!thr->llexit_done);
sewardj23f12002009-07-24 08:45:08 +00006152 Filter__clear(thr->filter, "libhb_Thr_resumes");
6153 /* A kludge, but .. if this thread doesn't have any marker stacks
6154 at all, get one right now. This is easier than figuring out
6155 exactly when at thread startup we can and can't take a stack
6156 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00006157 if (HG_(clo_history_level) == 1) {
6158 tl_assert(thr->local_Kws_n_stacks);
6159 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
6160 note_local_Kw_n_stack_for(thr);
6161 }
sewardj23f12002009-07-24 08:45:08 +00006162}
6163
6164
6165/////////////////////////////////////////////////////////
6166// //
sewardjf98e1c02008-10-25 16:22:41 +00006167// Synchronisation objects //
6168// //
6169/////////////////////////////////////////////////////////
6170
sewardjffce8152011-06-24 10:09:41 +00006171/* A double linked list of all the SO's. */
6172SO* admin_SO = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006173
sewardjffce8152011-06-24 10:09:41 +00006174static SO* SO__Alloc ( void )
6175{
sewardjf98e1c02008-10-25 16:22:41 +00006176 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6177 so->viR = VtsID_INVALID;
6178 so->viW = VtsID_INVALID;
6179 so->magic = SO_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00006180 /* Add to double linked list */
6181 if (admin_SO) {
6182 tl_assert(admin_SO->admin_prev == NULL);
6183 admin_SO->admin_prev = so;
6184 so->admin_next = admin_SO;
6185 } else {
6186 so->admin_next = NULL;
6187 }
6188 so->admin_prev = NULL;
6189 admin_SO = so;
6190 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006191 return so;
6192}
sewardjffce8152011-06-24 10:09:41 +00006193
6194static void SO__Dealloc ( SO* so )
6195{
sewardjf98e1c02008-10-25 16:22:41 +00006196 tl_assert(so);
6197 tl_assert(so->magic == SO_MAGIC);
6198 if (so->viR == VtsID_INVALID) {
6199 tl_assert(so->viW == VtsID_INVALID);
6200 } else {
6201 tl_assert(so->viW != VtsID_INVALID);
6202 VtsID__rcdec(so->viR);
6203 VtsID__rcdec(so->viW);
6204 }
6205 so->magic = 0;
sewardjffce8152011-06-24 10:09:41 +00006206 /* Del from double linked list */
6207 if (so->admin_prev)
6208 so->admin_prev->admin_next = so->admin_next;
6209 if (so->admin_next)
6210 so->admin_next->admin_prev = so->admin_prev;
6211 if (so == admin_SO)
6212 admin_SO = so->admin_next;
6213 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006214 HG_(free)( so );
6215}
6216
6217
6218/////////////////////////////////////////////////////////
6219// //
6220// Top Level API //
6221// //
6222/////////////////////////////////////////////////////////
6223
florian6bd9dc12012-11-23 16:17:43 +00006224static void show_thread_state ( const HChar* str, Thr* t )
sewardjf98e1c02008-10-25 16:22:41 +00006225{
6226 if (1) return;
6227 if (t->viR == t->viW) {
6228 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6229 VtsID__pp( t->viR );
6230 VG_(printf)("%s","\n");
6231 } else {
6232 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6233 VtsID__pp( t->viR );
6234 VG_(printf)(" viW %u==", t->viW);
6235 VtsID__pp( t->viW );
6236 VG_(printf)("%s","\n");
6237 }
6238}
6239
6240
6241Thr* libhb_init (
6242 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00006243 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00006244 )
6245{
6246 Thr* thr;
6247 VtsID vi;
sewardje4cce742011-02-24 15:25:24 +00006248
6249 // We will have to have to store a large number of these,
6250 // so make sure they're the size we expect them to be.
6251 tl_assert(sizeof(ScalarTS) == 8);
sewardjffce8152011-06-24 10:09:41 +00006252
6253 /* because first 1024 unusable */
6254 tl_assert(SCALARTS_N_THRBITS >= 11);
6255 /* so as to fit in a UInt w/ 3 bits to spare (see defn of
6256 Thr_n_RCEC). */
6257 tl_assert(SCALARTS_N_THRBITS <= 29);
6258
6259 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6260 (32-bit). It's not correctness-critical, but there are a lot of
6261 them, so it's important from a space viewpoint. Unfortunately
6262 we simply can't pack it into 2 words on a 32-bit target. */
6263 if (sizeof(UWord) == 8) {
6264 tl_assert(sizeof(Thr_n_RCEC) == 16);
6265 } else {
6266 tl_assert(sizeof(Thr_n_RCEC) == 12);
6267 }
6268
6269 /* Word sets really are 32 bits. Even on a 64 bit target. */
6270 tl_assert(sizeof(WordSetID) == 4);
6271 tl_assert(sizeof(WordSet) == sizeof(WordSetID));
sewardje4cce742011-02-24 15:25:24 +00006272
sewardjf98e1c02008-10-25 16:22:41 +00006273 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00006274 tl_assert(get_EC);
6275 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00006276 main_get_EC = get_EC;
6277
6278 // No need to initialise hg_wordfm.
6279 // No need to initialise hg_wordset.
6280
sewardj7aa38a92011-02-27 23:04:12 +00006281 /* Allocated once and never deallocated. Used as a temporary in
6282 VTS singleton, tick and join operations. */
6283 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6284 temp_max_sized_VTS->id = VtsID_INVALID;
philippec3508652015-03-28 12:01:58 +00006285 verydead_thread_tables_init();
sewardjf98e1c02008-10-25 16:22:41 +00006286 vts_set_init();
6287 vts_tab_init();
6288 event_map_init();
6289 VtsID__invalidate_caches();
6290
6291 // initialise shadow memory
philippe1475a7f2015-05-11 19:45:08 +00006292 zsm_init( );
sewardjf98e1c02008-10-25 16:22:41 +00006293
6294 thr = Thr__new();
6295 vi = VtsID__mk_Singleton( thr, 1 );
6296 thr->viR = vi;
6297 thr->viW = vi;
6298 VtsID__rcinc(thr->viR);
6299 VtsID__rcinc(thr->viW);
6300
6301 show_thread_state(" root", thr);
6302 return thr;
6303}
6304
sewardj23f12002009-07-24 08:45:08 +00006305
sewardjf98e1c02008-10-25 16:22:41 +00006306Thr* libhb_create ( Thr* parent )
6307{
6308 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6309 the child's index. Since the child's index is guaranteed
6310 unique, it has never been seen before, so the implicit value
6311 before the tick is zero and after that is one. */
6312 Thr* child = Thr__new();
6313
6314 child->viR = VtsID__tick( parent->viR, child );
6315 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00006316 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00006317 VtsID__rcinc(child->viR);
6318 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006319 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00006320 early for that - it may not have a valid TId yet. So, let
6321 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00006322
6323 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6324 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6325
6326 /* and the parent has to move along too */
6327 VtsID__rcdec(parent->viR);
6328 VtsID__rcdec(parent->viW);
6329 parent->viR = VtsID__tick( parent->viR, parent );
6330 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00006331 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00006332 VtsID__rcinc(parent->viR);
6333 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006334 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00006335
6336 show_thread_state(" child", child);
6337 show_thread_state("parent", parent);
6338
6339 return child;
6340}
6341
6342/* Shut down the library, and print stats (in fact that's _all_
6343 this is for. */
6344void libhb_shutdown ( Bool show_stats )
6345{
6346 if (show_stats) {
6347 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6348 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6349 stats__secmaps_allocd,
6350 stats__secmap_ga_space_covered);
6351 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6352 stats__secmap_linesZ_allocd,
6353 stats__secmap_linesZ_bytes);
philippe0fb30ac2015-05-15 13:17:17 +00006354 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)"
6355 " (%'10lu used)\n",
philippe71ed3c92015-05-17 19:32:42 +00006356 VG_(sizePA) (LineF_pool_allocator),
6357 VG_(sizePA) (LineF_pool_allocator) * sizeof(LineF),
philippe0fb30ac2015-05-15 13:17:17 +00006358 shmem__SecMap_used_linesF());
philippef54cb662015-05-10 22:19:31 +00006359 VG_(printf)(" secmaps: %'10lu in map (can be scanGCed %'5lu)"
6360 " #%lu scanGC \n",
6361 stats__secmaps_in_map_shmem,
6362 shmem__SecMap_do_GC(False /* really do GC */),
6363 stats__secmaps_scanGC);
6364 tl_assert (VG_(sizeFM) (map_shmem) == stats__secmaps_in_map_shmem);
6365 VG_(printf)(" secmaps: %'10lu in freelist,"
6366 " total (scanGCed %'lu, ssetGCed %'lu)\n",
6367 SecMap_freelist_length(),
6368 stats__secmaps_scanGCed,
6369 stats__secmaps_ssetGCed);
sewardjf98e1c02008-10-25 16:22:41 +00006370 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6371 stats__secmaps_search, stats__secmaps_search_slow);
6372
6373 VG_(printf)("%s","\n");
6374 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6375 stats__cache_totrefs, stats__cache_totmisses );
6376 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6377 stats__cache_Z_fetches, stats__cache_F_fetches );
6378 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6379 stats__cache_Z_wbacks, stats__cache_F_wbacks );
philippef54cb662015-05-10 22:19:31 +00006380 VG_(printf)(" cache: %'14lu flushes_invals\n",
6381 stats__cache_flushes_invals );
sewardjf98e1c02008-10-25 16:22:41 +00006382 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6383 stats__cache_make_New_arange,
6384 stats__cache_make_New_inZrep);
6385
6386 VG_(printf)("%s","\n");
6387 VG_(printf)(" cline: %'10lu normalises\n",
6388 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00006389 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6390 stats__cline_cread64s,
6391 stats__cline_cread32s,
6392 stats__cline_cread16s,
6393 stats__cline_cread08s );
6394 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6395 stats__cline_cwrite64s,
6396 stats__cline_cwrite32s,
6397 stats__cline_cwrite16s,
6398 stats__cline_cwrite08s );
6399 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6400 stats__cline_swrite64s,
6401 stats__cline_swrite32s,
6402 stats__cline_swrite16s,
6403 stats__cline_swrite08s );
6404 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6405 stats__cline_sread08s, stats__cline_scopy08s );
philippef54cb662015-05-10 22:19:31 +00006406 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu"
6407 " 2to1 %'12lu\n",
6408 stats__cline_64to32splits, stats__cline_32to16splits,
6409 stats__cline_16to8splits );
6410 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu"
6411 " 2to1 %'12lu\n",
6412 stats__cline_64to32pulldown, stats__cline_32to16pulldown,
6413 stats__cline_16to8pulldown );
sewardjf98e1c02008-10-25 16:22:41 +00006414 if (0)
philippef54cb662015-05-10 22:19:31 +00006415 VG_(printf)(" cline: sizeof(CacheLineZ) %ld,"
6416 " covers %ld bytes of arange\n",
6417 (Word)sizeof(LineZ),
6418 (Word)N_LINE_ARANGE);
sewardjf98e1c02008-10-25 16:22:41 +00006419
6420 VG_(printf)("%s","\n");
6421
sewardjc8028ad2010-05-05 09:34:42 +00006422 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006423 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00006424 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006425 stats__msmcwrite, stats__msmcwrite_change);
6426 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6427 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00006428 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6429 stats__join2_queries, stats__join2_misses);
6430
6431 VG_(printf)("%s","\n");
philippef54cb662015-05-10 22:19:31 +00006432 VG_(printf)(" libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6433 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6434 VG_(printf)(" libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6435 stats__vts__cmp_structural, stats__vts__cmp_structural_slow);
6436 VG_(printf)(" libhb: VTSset: find__or__clone_and_add %'lu"
6437 " (%'lu allocd)\n",
sewardj7aa38a92011-02-27 23:04:12 +00006438 stats__vts_set__focaa, stats__vts_set__focaa_a );
sewardjc8028ad2010-05-05 09:34:42 +00006439 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6440 stats__vts__indexat_slow );
6441
6442 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00006443 VG_(printf)(
6444 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6445 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6446 );
philippe2bd23262015-05-11 20:56:49 +00006447 VG_(printf)(" libhb: #%lu vts_tab GC #%lu vts pruning\n",
6448 stats__vts_tab_GC, stats__vts_pruning);
sewardjf98e1c02008-10-25 16:22:41 +00006449 VG_(printf)( " libhb: %lu entries in vts_set\n",
6450 VG_(sizeFM)( vts_set ) );
6451
6452 VG_(printf)("%s","\n");
philippe900c5352015-03-24 14:02:44 +00006453 {
6454 UInt live = 0;
6455 UInt llexit_done = 0;
6456 UInt joinedwith_done = 0;
6457 UInt llexit_and_joinedwith_done = 0;
6458
6459 Thread* hgthread = get_admin_threads();
6460 tl_assert(hgthread);
6461 while (hgthread) {
6462 Thr* hbthr = hgthread->hbthr;
6463 tl_assert(hbthr);
6464 if (hbthr->llexit_done && hbthr->joinedwith_done)
6465 llexit_and_joinedwith_done++;
6466 else if (hbthr->llexit_done)
6467 llexit_done++;
6468 else if (hbthr->joinedwith_done)
6469 joinedwith_done++;
6470 else
6471 live++;
6472 hgthread = hgthread->admin;
6473 }
6474 VG_(printf)(" libhb: threads live: %d exit_and_joinedwith %d"
6475 " exit %d joinedwith %d\n",
6476 live, llexit_and_joinedwith_done,
6477 llexit_done, joinedwith_done);
philippec3508652015-03-28 12:01:58 +00006478 VG_(printf)(" libhb: %d verydead_threads, "
6479 "%d verydead_threads_not_pruned\n",
6480 (int) VG_(sizeXA)( verydead_thread_table),
6481 (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6482 tl_assert (VG_(sizeXA)( verydead_thread_table)
6483 + VG_(sizeXA)( verydead_thread_table_not_pruned)
6484 == llexit_and_joinedwith_done);
philippe900c5352015-03-24 14:02:44 +00006485 }
6486
6487 VG_(printf)("%s","\n");
philipped005b2c2015-04-21 21:58:14 +00006488 {
6489 UWord OldRef_accs_n[N_OLDREF_ACCS+1];
6490 UInt accs_n;
6491 UWord OldRef_n;
6492 UInt i;
6493
6494 OldRef_n = 0;
6495 for (i = 0; i <= N_OLDREF_ACCS; i++)
6496 OldRef_accs_n[i] = 0;
6497
6498 for (OldRef* o = mru.prev; o != &lru; o = o->prev) {
6499 OldRef_n++;
6500 accs_n = 0;
6501 for (i = 0; i < N_OLDREF_ACCS; i++) {
6502 if (o->accs[i].thrid != 0)
6503 accs_n++;
6504 }
6505 OldRef_accs_n[accs_n]++;
6506 }
6507
6508 tl_assert(OldRef_n == oldrefTreeN);
6509 VG_(printf)( " libhb: oldrefTreeN %lu ", oldrefTreeN);
6510 VG_(printf)( "( ");
6511 for (i = 0; i <= N_OLDREF_ACCS; i++)
6512 VG_(printf)( "accs[%d]=%lu ", i, OldRef_accs_n[i]);
6513 VG_(printf)( ")\n");
6514 }
sewardjf98e1c02008-10-25 16:22:41 +00006515 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
6516 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
6517 stats__ctxt_rcdec2,
6518 stats__ctxt_rcdec3 );
6519 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
6520 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
philippecabdbb52015-04-20 21:33:16 +00006521 VG_(printf)( " libhb: contextTab: %lu slots,"
6522 " %lu cur ents(ref'd %lu),"
philippe06bc23a2015-04-17 21:19:43 +00006523 " %lu max ents\n",
sewardjf98e1c02008-10-25 16:22:41 +00006524 (UWord)N_RCEC_TAB,
philippecabdbb52015-04-20 21:33:16 +00006525 stats__ctxt_tab_curr, RCEC_referenced,
6526 stats__ctxt_tab_max );
philippe47124e92015-04-25 14:00:24 +00006527 {
6528# define MAXCHAIN 10
6529 UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
6530 UInt non0chain = 0;
6531 UInt n;
6532 UInt i;
6533 RCEC *p;
6534
6535 for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
6536 for (i = 0; i < N_RCEC_TAB; i++) {
6537 n = 0;
6538 for (p = contextTab[i]; p; p = p->next)
6539 n++;
6540 if (n < MAXCHAIN)
6541 chains[n]++;
6542 else
6543 chains[MAXCHAIN]++;
6544 if (n > 0)
6545 non0chain++;
6546 }
6547 VG_(printf)( " libhb: contextTab chain of [length]=nchain."
6548 " Avg chain len %3.1f\n"
6549 " ",
6550 (Double)stats__ctxt_tab_curr
6551 / (Double)(non0chain ? non0chain : 1));
6552 for (i = 0; i <= MAXCHAIN; i++) {
6553 if (chains[i] != 0)
6554 VG_(printf)( "[%d%s]=%d ",
6555 i, i == MAXCHAIN ? "+" : "",
6556 chains[i]);
6557 }
6558 VG_(printf)( "\n");
6559# undef MAXCHAIN
6560 }
sewardjf98e1c02008-10-25 16:22:41 +00006561 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6562 stats__ctxt_tab_qs,
6563 stats__ctxt_tab_cmps );
6564#if 0
6565 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6566 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6567 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6568 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6569 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6570 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6571 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6572 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6573 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6574 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6575 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6576 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6577 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6578 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6579
6580 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6581 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6582 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6583 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6584#endif
6585
6586 VG_(printf)("%s","<<< END libhb stats >>>\n");
6587 VG_(printf)("%s","\n");
6588
6589 }
6590}
6591
sewardjffce8152011-06-24 10:09:41 +00006592/* Receive notification that a thread has low level exited. The
6593 significance here is that we do not expect to see any more memory
6594 references from it. */
sewardjf98e1c02008-10-25 16:22:41 +00006595void libhb_async_exit ( Thr* thr )
6596{
sewardj23f12002009-07-24 08:45:08 +00006597 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006598 tl_assert(!thr->llexit_done);
6599 thr->llexit_done = True;
sewardj2d2ea2f2009-08-02 10:15:07 +00006600
6601 /* free up Filter and local_Kws_n_stacks (well, actually not the
6602 latter ..) */
6603 tl_assert(thr->filter);
6604 HG_(free)(thr->filter);
6605 thr->filter = NULL;
6606
sewardjffce8152011-06-24 10:09:41 +00006607 /* Tell the VTS mechanism this thread has exited, so it can
6608 participate in VTS pruning. Note this can only happen if the
6609 thread has both ll_exited and has been joined with. */
6610 if (thr->joinedwith_done)
6611 VTS__declare_thread_very_dead(thr);
6612
sewardj2d2ea2f2009-08-02 10:15:07 +00006613 /* Another space-accuracy tradeoff. Do we want to be able to show
6614 H1 history for conflicts in threads which have since exited? If
6615 yes, then we better not free up thr->local_Kws_n_stacks. The
6616 downside is a potential per-thread leak of up to
6617 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6618 XArray average overcommit factor is (1.5 I'd guess). */
6619 // hence:
6620 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6621 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006622}
6623
sewardjffce8152011-06-24 10:09:41 +00006624/* Receive notification that a thread has been joined with. The
6625 significance here is that we do not expect to see any further
6626 references to its vector clocks (Thr::viR and Thr::viW). */
6627void libhb_joinedwith_done ( Thr* thr )
6628{
6629 tl_assert(thr);
6630 /* Caller must ensure that this is only ever called once per Thr. */
6631 tl_assert(!thr->joinedwith_done);
6632 thr->joinedwith_done = True;
6633 if (thr->llexit_done)
6634 VTS__declare_thread_very_dead(thr);
6635}
6636
6637
sewardjf98e1c02008-10-25 16:22:41 +00006638/* Both Segs and SOs point to VTSs. However, there is no sharing, so
6639 a Seg that points at a VTS is its one-and-only owner, and ditto for
6640 a SO that points at a VTS. */
6641
6642SO* libhb_so_alloc ( void )
6643{
6644 return SO__Alloc();
6645}
6646
6647void libhb_so_dealloc ( SO* so )
6648{
6649 tl_assert(so);
6650 tl_assert(so->magic == SO_MAGIC);
6651 SO__Dealloc(so);
6652}
6653
6654/* See comments in libhb.h for details on the meaning of
6655 strong vs weak sends and strong vs weak receives. */
6656void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6657{
6658 /* Copy the VTSs from 'thr' into the sync object, and then move
6659 the thread along one step. */
6660
6661 tl_assert(so);
6662 tl_assert(so->magic == SO_MAGIC);
6663
6664 /* stay sane .. a thread's read-clock must always lead or be the
6665 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00006666 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6667 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00006668 }
6669
6670 /* since we're overwriting the VtsIDs in the SO, we need to drop
6671 any references made by the previous contents thereof */
6672 if (so->viR == VtsID_INVALID) {
6673 tl_assert(so->viW == VtsID_INVALID);
6674 so->viR = thr->viR;
6675 so->viW = thr->viW;
6676 VtsID__rcinc(so->viR);
6677 VtsID__rcinc(so->viW);
6678 } else {
6679 /* In a strong send, we dump any previous VC in the SO and
6680 install the sending thread's VC instead. For a weak send we
6681 must join2 with what's already there. */
6682 tl_assert(so->viW != VtsID_INVALID);
6683 VtsID__rcdec(so->viR);
6684 VtsID__rcdec(so->viW);
6685 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6686 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6687 VtsID__rcinc(so->viR);
6688 VtsID__rcinc(so->viW);
6689 }
6690
6691 /* move both parent clocks along */
6692 VtsID__rcdec(thr->viR);
6693 VtsID__rcdec(thr->viW);
6694 thr->viR = VtsID__tick( thr->viR, thr );
6695 thr->viW = VtsID__tick( thr->viW, thr );
sewardjffce8152011-06-24 10:09:41 +00006696 if (!thr->llexit_done) {
sewardj2d2ea2f2009-08-02 10:15:07 +00006697 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00006698 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006699 }
sewardjf98e1c02008-10-25 16:22:41 +00006700 VtsID__rcinc(thr->viR);
6701 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006702
sewardjf98e1c02008-10-25 16:22:41 +00006703 if (strong_send)
6704 show_thread_state("s-send", thr);
6705 else
6706 show_thread_state("w-send", thr);
6707}
6708
6709void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6710{
6711 tl_assert(so);
6712 tl_assert(so->magic == SO_MAGIC);
6713
6714 if (so->viR != VtsID_INVALID) {
6715 tl_assert(so->viW != VtsID_INVALID);
6716
6717 /* Weak receive (basically, an R-acquisition of a R-W lock).
6718 This advances the read-clock of the receiver, but not the
6719 write-clock. */
6720 VtsID__rcdec(thr->viR);
6721 thr->viR = VtsID__join2( thr->viR, so->viR );
6722 VtsID__rcinc(thr->viR);
6723
sewardj90eb22e2009-07-28 20:22:18 +00006724 /* At one point (r10589) it seemed safest to tick the clocks for
6725 the receiving thread after the join. But on reflection, I
6726 wonder if that might cause it to 'overtake' constraints,
6727 which could lead to missing races. So, back out that part of
6728 r10589. */
6729 //VtsID__rcdec(thr->viR);
6730 //thr->viR = VtsID__tick( thr->viR, thr );
6731 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00006732
sewardjf98e1c02008-10-25 16:22:41 +00006733 /* For a strong receive, we also advance the receiver's write
6734 clock, which means the receive as a whole is essentially
6735 equivalent to a W-acquisition of a R-W lock. */
6736 if (strong_recv) {
6737 VtsID__rcdec(thr->viW);
6738 thr->viW = VtsID__join2( thr->viW, so->viW );
6739 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006740
sewardj90eb22e2009-07-28 20:22:18 +00006741 /* See comment just above, re r10589. */
6742 //VtsID__rcdec(thr->viW);
6743 //thr->viW = VtsID__tick( thr->viW, thr );
6744 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00006745 }
6746
sewardjf4845dc2010-05-28 20:09:59 +00006747 if (thr->filter)
6748 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00006749 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00006750
sewardjf98e1c02008-10-25 16:22:41 +00006751 if (strong_recv)
6752 show_thread_state("s-recv", thr);
6753 else
6754 show_thread_state("w-recv", thr);
6755
6756 } else {
6757 tl_assert(so->viW == VtsID_INVALID);
6758 /* Deal with degenerate case: 'so' has no vts, so there has been
6759 no message posted to it. Just ignore this case. */
6760 show_thread_state("d-recv", thr);
6761 }
6762}
6763
6764Bool libhb_so_everSent ( SO* so )
6765{
6766 if (so->viR == VtsID_INVALID) {
6767 tl_assert(so->viW == VtsID_INVALID);
6768 return False;
6769 } else {
6770 tl_assert(so->viW != VtsID_INVALID);
6771 return True;
6772 }
6773}
6774
6775#define XXX1 0 // 0x67a106c
6776#define XXX2 0
6777
sewardj23f12002009-07-24 08:45:08 +00006778static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00006779 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6780 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6781 return False;
6782}
florian0c8a47c2013-10-01 20:10:21 +00006783static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
florian6bf37262012-10-21 03:23:36 +00006784{
sewardj23f12002009-07-24 08:45:08 +00006785 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00006786 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6787 show_thread_state("", thr);
6788 VG_(printf)("%s","\n");
6789}
6790
sewardj23f12002009-07-24 08:45:08 +00006791void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006792{
6793 SVal sv = SVal__mkC(thr->viW, thr->viW);
6794 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00006795 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6796 zsm_sset_range( a, szB, sv );
6797 Filter__clear_range( thr->filter, a, szB );
6798 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00006799}
6800
sewardjfd35d492011-03-17 19:39:55 +00006801void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006802{
sewardj23f12002009-07-24 08:45:08 +00006803 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00006804}
6805
philippef54cb662015-05-10 22:19:31 +00006806
6807/* Set the lines zix_start till zix_end to NOACCESS. */
6808static void zsm_secmap_line_range_noaccess (SecMap *sm,
6809 UInt zix_start, UInt zix_end)
6810{
6811 for (UInt lz = zix_start; lz <= zix_end; lz++) {
6812 LineZ* lineZ;
philippef54cb662015-05-10 22:19:31 +00006813 lineZ = &sm->linesZ[lz];
6814 if (lineZ->dict[0] != SVal_INVALID) {
6815 rcdec_LineZ(lineZ);
philippe71ed3c92015-05-17 19:32:42 +00006816 lineZ->dict[0] = SVal_NOACCESS;
6817 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
philippef54cb662015-05-10 22:19:31 +00006818 } else {
philippe71ed3c92015-05-17 19:32:42 +00006819 clear_LineF_of_Z(lineZ);
philippef54cb662015-05-10 22:19:31 +00006820 }
philippef54cb662015-05-10 22:19:31 +00006821 for (UInt i = 0; i < N_LINE_ARANGE/4; i++)
6822 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
6823 }
6824}
6825
6826/* Set the given range to SVal_NOACCESS in-place in the secmap.
6827 a must be cacheline aligned. len must be a multiple of a cacheline
6828 and must be < N_SECMAP_ARANGE. */
6829static void zsm_sset_range_noaccess_in_secmap(Addr a, SizeT len)
6830{
6831 tl_assert (is_valid_scache_tag (a));
6832 tl_assert (0 == (len & (N_LINE_ARANGE - 1)));
6833 tl_assert (len < N_SECMAP_ARANGE);
6834
6835 SecMap *sm1 = shmem__find_SecMap (a);
6836 SecMap *sm2 = shmem__find_SecMap (a + len - 1);
6837 UWord zix_start = shmem__get_SecMap_offset(a ) >> N_LINE_BITS;
6838 UWord zix_end = shmem__get_SecMap_offset(a + len - 1) >> N_LINE_BITS;
6839
6840 if (sm1) {
6841 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm1));
6842 zsm_secmap_line_range_noaccess (sm1, zix_start,
6843 sm1 == sm2 ? zix_end : N_SECMAP_ZLINES-1);
6844 }
6845 if (sm2 && sm1 != sm2) {
6846 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm2));
6847 zsm_secmap_line_range_noaccess (sm2, 0, zix_end);
6848 }
6849}
6850
6851/* Set the given address range to SVal_NOACCESS.
6852 The SecMaps fully set to SVal_NOACCESS will be pushed in SecMap_freelist. */
6853static void zsm_sset_range_noaccess (Addr addr, SizeT len)
6854{
6855 /*
6856 BPC = Before, Partial Cacheline, = addr
6857 (i.e. starting inside a cacheline/inside a SecMap)
6858 BFC = Before, Full Cacheline(s), but not full SecMap
6859 (i.e. starting inside a SecMap)
6860 FSM = Full SecMap(s)
6861 (i.e. starting a SecMap)
6862 AFC = After, Full Cacheline(s), but not full SecMap
6863 (i.e. first address after the full SecMap(s))
6864 APC = After, Partial Cacheline, i.e. first address after the
6865 full CacheLines).
6866 ARE = After Range End = addr+len = first address not part of the range.
6867
6868 If addr starts a Cacheline, then BPC == BFC.
6869 If addr starts a SecMap, then BPC == BFC == FSM.
6870 If addr+len starts a SecMap, then APC == ARE == AFC
6871 If addr+len starts a Cacheline, then APC == ARE
6872 */
6873 Addr ARE = addr + len;
6874 Addr BPC = addr;
6875 Addr BFC = ROUNDUP(BPC, N_LINE_ARANGE);
6876 Addr FSM = ROUNDUP(BPC, N_SECMAP_ARANGE);
6877 Addr AFC = ROUNDDN(ARE, N_SECMAP_ARANGE);
6878 Addr APC = ROUNDDN(ARE, N_LINE_ARANGE);
6879 SizeT Plen = len; // Plen will be split between the following:
6880 SizeT BPClen;
6881 SizeT BFClen;
6882 SizeT FSMlen;
6883 SizeT AFClen;
6884 SizeT APClen;
6885
6886 /* Consumes from Plen the nr of bytes between from and to.
6887 from and to must be aligned on a multiple of round.
6888 The length consumed will be a multiple of round, with
6889 a maximum of Plen. */
6890# define PlenCONSUME(from, to, round, consumed) \
6891 do { \
6892 if (from < to) { \
6893 if (to - from < Plen) \
6894 consumed = to - from; \
6895 else \
6896 consumed = ROUNDDN(Plen, round); \
6897 } else { \
6898 consumed = 0; \
6899 } \
6900 Plen -= consumed; } while (0)
6901
6902 PlenCONSUME(BPC, BFC, 1, BPClen);
6903 PlenCONSUME(BFC, FSM, N_LINE_ARANGE, BFClen);
6904 PlenCONSUME(FSM, AFC, N_SECMAP_ARANGE, FSMlen);
6905 PlenCONSUME(AFC, APC, N_LINE_ARANGE, AFClen);
6906 PlenCONSUME(APC, ARE, 1, APClen);
6907
6908 if (0)
6909 VG_(printf) ("addr %p[%ld] ARE %p"
6910 " BPC %p[%ld] BFC %p[%ld] FSM %p[%ld]"
6911 " AFC %p[%ld] APC %p[%ld]\n",
6912 (void*)addr, len, (void*)ARE,
6913 (void*)BPC, BPClen, (void*)BFC, BFClen, (void*)FSM, FSMlen,
6914 (void*)AFC, AFClen, (void*)APC, APClen);
6915
6916 tl_assert (Plen == 0);
6917
6918 /* Set to NOACCESS pieces before and after not covered by entire SecMaps. */
6919
6920 /* First we set the partial cachelines. This is done through the cache. */
6921 if (BPClen > 0)
6922 zsm_sset_range_SMALL (BPC, BPClen, SVal_NOACCESS);
6923 if (APClen > 0)
6924 zsm_sset_range_SMALL (APC, APClen, SVal_NOACCESS);
6925
6926 /* After this, we will not use the cache anymore. We will directly work
6927 in-place on the z shadow memory in SecMap(s).
6928 So, we invalidate the cachelines for the whole range we are setting
6929 to NOACCESS below. */
6930 shmem__invalidate_scache_range (BFC, APC - BFC);
6931
6932 if (BFClen > 0)
6933 zsm_sset_range_noaccess_in_secmap (BFC, BFClen);
6934 if (AFClen > 0)
6935 zsm_sset_range_noaccess_in_secmap (AFC, AFClen);
6936
6937 if (FSMlen > 0) {
6938 /* Set to NOACCESS all the SecMaps, pushing the SecMaps to the
6939 free list. */
6940 Addr sm_start = FSM;
6941 while (sm_start < AFC) {
6942 SecMap *sm = shmem__find_SecMap (sm_start);
6943 if (sm) {
6944 Addr gaKey;
6945 SecMap *fm_sm;
6946
6947 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
6948 for (UInt lz = 0; lz < N_SECMAP_ZLINES; lz++) {
philippe71ed3c92015-05-17 19:32:42 +00006949 LineZ *lineZ = &sm->linesZ[lz];
6950 if (LIKELY(lineZ->dict[0] != SVal_INVALID))
6951 rcdec_LineZ(lineZ);
6952 else
6953 clear_LineF_of_Z(lineZ);
philippef54cb662015-05-10 22:19:31 +00006954 }
6955 if (!VG_(delFromFM)(map_shmem, &gaKey, (UWord*)&fm_sm, sm_start))
6956 tl_assert (0);
6957 stats__secmaps_in_map_shmem--;
6958 tl_assert (gaKey == sm_start);
6959 tl_assert (sm == fm_sm);
6960 stats__secmaps_ssetGCed++;
6961 push_SecMap_on_freelist (sm);
6962 }
6963 sm_start += N_SECMAP_ARANGE;
6964 }
6965 tl_assert (sm_start == AFC);
6966
6967 /* The above loop might have kept copies of freed SecMap in the smCache.
6968 => clear them. */
6969 if (address_in_range(smCache[0].gaKey, FSM, FSMlen)) {
6970 smCache[0].gaKey = 1;
6971 smCache[0].sm = NULL;
6972 }
6973 if (address_in_range(smCache[1].gaKey, FSM, FSMlen)) {
6974 smCache[1].gaKey = 1;
6975 smCache[1].sm = NULL;
6976 }
6977 if (address_in_range(smCache[2].gaKey, FSM, FSMlen)) {
6978 smCache[2].gaKey = 1;
6979 smCache[2].sm = NULL;
6980 }
6981 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(SMCacheEnt));
6982 }
6983}
6984
sewardjfd35d492011-03-17 19:39:55 +00006985void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6986{
6987 /* This really does put the requested range in NoAccess. It's
6988 expensive though. */
6989 SVal sv = SVal_NOACCESS;
6990 tl_assert(is_sane_SVal_C(sv));
philippef54cb662015-05-10 22:19:31 +00006991 if (LIKELY(szB < 2 * N_LINE_ARANGE))
6992 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
6993 else
6994 zsm_sset_range_noaccess (a, szB);
sewardjfd35d492011-03-17 19:39:55 +00006995 Filter__clear_range( thr->filter, a, szB );
6996}
6997
philippef54cb662015-05-10 22:19:31 +00006998/* Works byte at a time. Can be optimised if needed. */
6999UWord libhb_srange_get_abits (Addr a, UChar *abits, SizeT len)
7000{
7001 UWord anr = 0; // nr of bytes addressable.
7002
7003 /* Get the accessibility of each byte. Pay attention to not
7004 create SecMap or LineZ when checking if a byte is addressable.
7005
7006 Note: this is used for client request. Performance deemed not critical.
7007 So for simplicity, we work byte per byte.
7008 Performance could be improved by working with full cachelines
7009 or with full SecMap, when reaching a cacheline or secmap boundary. */
7010 for (SizeT i = 0; i < len; i++) {
7011 SVal sv = SVal_INVALID;
7012 Addr b = a + i;
7013 Addr tag = b & ~(N_LINE_ARANGE - 1);
7014 UWord wix = (b >> N_LINE_BITS) & (N_WAY_NENT - 1);
7015 UWord cloff = get_cacheline_offset(b);
7016
7017 /* Note: we do not use get_cacheline(b) to avoid creating cachelines
7018 and/or SecMap for non addressable bytes. */
7019 if (tag == cache_shmem.tags0[wix]) {
7020 CacheLine copy = cache_shmem.lyns0[wix];
7021 /* We work on a copy of the cacheline, as we do not want to
7022 record the client request as a real read.
7023 The below is somewhat similar to zsm_sapply08__msmcread but
7024 avoids side effects on the cache. */
7025 UWord toff = get_tree_offset(b); /* == 0 .. 7 */
7026 UWord tno = get_treeno(b);
7027 UShort descr = copy.descrs[tno];
7028 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
7029 SVal* tree = &copy.svals[tno << 3];
7030 copy.descrs[tno] = pulldown_to_8(tree, toff, descr);
7031 }
7032 sv = copy.svals[cloff];
7033 } else {
7034 /* Byte not found in the cacheline. Search for a SecMap. */
7035 SecMap *sm = shmem__find_SecMap(b);
7036 LineZ *lineZ;
7037 if (sm == NULL)
7038 sv = SVal_NOACCESS;
7039 else {
7040 UWord zix = shmem__get_SecMap_offset(b) >> N_LINE_BITS;
7041 lineZ = &sm->linesZ[zix];
7042 if (lineZ->dict[0] == SVal_INVALID) {
philippe71ed3c92015-05-17 19:32:42 +00007043 LineF *lineF = SVal2Ptr(lineZ->dict[1]);
7044 sv = lineF->w64s[cloff];
philippef54cb662015-05-10 22:19:31 +00007045 } else {
7046 UWord ix = read_twobit_array( lineZ->ix2s, cloff );
7047 sv = lineZ->dict[ix];
7048 }
7049 }
7050 }
7051
7052 tl_assert (sv != SVal_INVALID);
7053 if (sv == SVal_NOACCESS) {
7054 if (abits)
7055 abits[i] = 0x00;
7056 } else {
7057 if (abits)
7058 abits[i] = 0xff;
7059 anr++;
7060 }
7061 }
7062
7063 return anr;
7064}
7065
7066
sewardj406bac82010-03-03 23:03:40 +00007067void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
7068{
7069 SVal sv = SVal_NOACCESS;
7070 tl_assert(is_sane_SVal_C(sv));
7071 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
philippef54cb662015-05-10 22:19:31 +00007072 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7073 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7074 else
7075 zsm_sset_range_noaccess (a, szB);
sewardj406bac82010-03-03 23:03:40 +00007076 Filter__clear_range( thr->filter, a, szB );
7077 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
7078}
7079
sewardj0b20a152011-03-10 21:34:21 +00007080Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
sewardjf98e1c02008-10-25 16:22:41 +00007081 tl_assert(thr);
sewardj60626642011-03-10 15:14:37 +00007082 return thr->hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00007083}
7084
sewardj0b20a152011-03-10 21:34:21 +00007085void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
sewardjf98e1c02008-10-25 16:22:41 +00007086 tl_assert(thr);
sewardj0b20a152011-03-10 21:34:21 +00007087 thr->hgthread = hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00007088}
7089
sewardj23f12002009-07-24 08:45:08 +00007090void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00007091{
sewardj23f12002009-07-24 08:45:08 +00007092 zsm_scopy_range(src, dst, len);
7093 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00007094}
7095
7096void libhb_maybe_GC ( void )
7097{
philippecabdbb52015-04-20 21:33:16 +00007098 /* GC the unreferenced (zero rc) RCECs when
philippee0829e02015-04-21 20:55:40 +00007099 (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
7100 with mostly NULL ptr)
7101 and (2) approaching the max nr of RCEC (as we have in any case
7102 at least that amount of RCEC in the pool allocator)
7103 Note: the margin allows to avoid a small but constant increase
7104 of the max nr of RCEC due to the fact that libhb_maybe_GC is
7105 not called when the current nr of RCEC exactly reaches the max.
7106 and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
7107 Avoid growing too much the nr of RCEC keeps the memory use low,
7108 and avoids to have too many elements in the (fixed) contextTab hashtable.
7109 */
philippecabdbb52015-04-20 21:33:16 +00007110 if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
philippee0829e02015-04-21 20:55:40 +00007111 && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
philippef54cb662015-05-10 22:19:31 +00007112 && (stats__ctxt_tab_curr * 3)/4 > RCEC_referenced))
philippecabdbb52015-04-20 21:33:16 +00007113 do_RCEC_GC();
philippe158404e2015-04-10 19:34:14 +00007114
philippef54cb662015-05-10 22:19:31 +00007115 /* If there are still no entries available (all the table entries are full),
7116 and we hit the threshhold point, then do a GC */
7117 Bool vts_tab_GC = vts_tab_freelist == VtsID_INVALID
7118 && VG_(sizeXA)( vts_tab ) >= vts_next_GC_at;
7119 if (UNLIKELY (vts_tab_GC))
7120 vts_tab__do_GC( False/*don't show stats*/ );
7121
7122 /* scan GC the SecMaps when
7123 (1) no SecMap in the freelist
7124 and (2) the current nr of live secmaps exceeds the threshold. */
7125 if (UNLIKELY(SecMap_freelist == NULL
7126 && stats__secmaps_in_map_shmem >= next_SecMap_GC_at)) {
7127 // If we did a vts tab GC, then no need to flush the cache again.
7128 if (!vts_tab_GC)
7129 zsm_flush_cache();
7130 shmem__SecMap_do_GC(True);
7131 }
philippecabdbb52015-04-20 21:33:16 +00007132
7133 /* Check the reference counts (expensive) */
7134 if (CHECK_CEM)
7135 event_map__check_reference_counts();
sewardjf98e1c02008-10-25 16:22:41 +00007136}
7137
7138
7139/////////////////////////////////////////////////////////////////
7140/////////////////////////////////////////////////////////////////
7141// //
7142// SECTION END main library //
7143// //
7144/////////////////////////////////////////////////////////////////
7145/////////////////////////////////////////////////////////////////
7146
7147/*--------------------------------------------------------------------*/
7148/*--- end libhb_main.c ---*/
7149/*--------------------------------------------------------------------*/