blob: ef887e3160c7864d92f7f3baed146f732b6396a8 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
sewardj0f157dd2013-10-18 14:27:36 +000012 Copyright (C) 2008-2013 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000034#include "pub_tool_poolalloc.h"
sewardjf98e1c02008-10-25 16:22:41 +000035#include "pub_tool_libcassert.h"
36#include "pub_tool_libcbase.h"
37#include "pub_tool_libcprint.h"
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000040#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000041#include "pub_tool_xarray.h"
42#include "pub_tool_oset.h"
43#include "pub_tool_threadstate.h"
44#include "pub_tool_aspacemgr.h"
45#include "pub_tool_execontext.h"
46#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000047#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000048#include "hg_basics.h"
49#include "hg_wordset.h"
50#include "hg_lock_n_thread.h"
51#include "hg_errors.h"
52
53#include "libhb.h"
54
55
sewardj8f5374e2008-12-07 11:40:17 +000056/////////////////////////////////////////////////////////////////
57/////////////////////////////////////////////////////////////////
58// //
59// Debugging #defines //
60// //
61/////////////////////////////////////////////////////////////////
62/////////////////////////////////////////////////////////////////
63
64/* Check the sanity of shadow values in the core memory state
65 machine. Change #if 0 to #if 1 to enable this. */
66#if 0
67# define CHECK_MSM 1
68#else
69# define CHECK_MSM 0
70#endif
71
72
73/* Check sanity (reference counts, etc) in the conflicting access
74 machinery. Change #if 0 to #if 1 to enable this. */
75#if 0
76# define CHECK_CEM 1
77#else
78# define CHECK_CEM 0
79#endif
80
81
82/* Check sanity in the compressed shadow memory machinery,
83 particularly in its caching innards. Unfortunately there's no
84 almost-zero-cost way to make them selectable at run time. Hence
85 set the #if 0 to #if 1 and rebuild if you want them. */
86#if 0
87# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
88# define inline __attribute__((noinline))
89 /* probably want to ditch -fomit-frame-pointer too */
90#else
91# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
92#endif
93
94
95/////////////////////////////////////////////////////////////////
96/////////////////////////////////////////////////////////////////
97// //
sewardjffce8152011-06-24 10:09:41 +000098// data decls: VtsID //
99// //
100/////////////////////////////////////////////////////////////////
101/////////////////////////////////////////////////////////////////
102
103/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
104 bits, since they have to be packed into the lowest 30 bits of an
105 SVal. */
106typedef UInt VtsID;
107#define VtsID_INVALID 0xFFFFFFFF
108
109
110
111/////////////////////////////////////////////////////////////////
112/////////////////////////////////////////////////////////////////
113// //
114// data decls: SVal //
115// //
116/////////////////////////////////////////////////////////////////
117/////////////////////////////////////////////////////////////////
118
119typedef ULong SVal;
120
121/* This value has special significance to the implementation, and callers
122 may not store it in the shadow memory. */
123#define SVal_INVALID (3ULL << 62)
124
125/* This is the default value for shadow memory. Initially the shadow
126 memory contains no accessible areas and so all reads produce this
127 value. TODO: make this caller-defineable. */
128#define SVal_NOACCESS (2ULL << 62)
129
130
131
132/////////////////////////////////////////////////////////////////
133/////////////////////////////////////////////////////////////////
134// //
135// data decls: ScalarTS //
136// //
137/////////////////////////////////////////////////////////////////
138/////////////////////////////////////////////////////////////////
139
140/* Scalar Timestamp. We have to store a lot of these, so there is
141 some effort to make them as small as possible. Logically they are
142 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
143 We pack it into 64 bits by representing the Thr* using a ThrID, a
144 small integer (18 bits), and a 46 bit integer for the timestamp
145 number. The 46/18 split is arbitary, but has the effect that
146 Helgrind can only handle programs that create 2^18 or fewer threads
147 over their entire lifetime, and have no more than 2^46 timestamp
148 ticks (synchronisation operations on the same thread).
149
150 This doesn't seem like much of a limitation. 2^46 ticks is
151 7.06e+13, and if each tick (optimistically) takes the machine 1000
152 cycles to process, then the minimum time to process that many ticks
153 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
154 but VTS ticks, which isn't realistic.
155
156 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
157 32 since a ThrID is a UInt. 29 comes from the fact that
158 'Thr_n_RCEC', which records information about old accesses, packs
159 not only a ThrID but also 2+1 other bits (access size and
160 writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
161
162 NB2: thrid values are issued upwards from 1024, and values less
163 than that aren't valid. This isn't per se necessary (any order
164 will do, so long as they are unique), but it does help ensure they
165 are less likely to get confused with the various other kinds of
166 small-integer thread ids drifting around (eg, TId). See also NB5.
167
168 NB3: this probably also relies on the fact that Thr's are never
169 deallocated -- they exist forever. Hence the 1-1 mapping from
170 Thr's to thrid values (set up in Thr__new) persists forever.
171
172 NB4: temp_max_sized_VTS is allocated at startup and never freed.
173 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
174 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
175 making the memory use for this go sky-high. With
176 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
177 like an OK tradeoff. If more than 256k threads need to be
178 supported, we could change SCALARTS_N_THRBITS to 20, which would
179 facilitate supporting 1 million threads at the cost of 8MB storage
180 for temp_max_sized_VTS.
181
182 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
183 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
184 must never be a valid ThrID. Given NB2 that's OK.
185*/
186#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
187
188#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
189typedef
190 struct {
191 ThrID thrid : SCALARTS_N_THRBITS;
192 ULong tym : SCALARTS_N_TYMBITS;
193 }
194 ScalarTS;
195
196#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
197
198
199
200/////////////////////////////////////////////////////////////////
201/////////////////////////////////////////////////////////////////
202// //
203// data decls: Filter //
204// //
205/////////////////////////////////////////////////////////////////
206/////////////////////////////////////////////////////////////////
207
208// baseline: 5, 9
209#define FI_LINE_SZB_LOG2 5
210#define FI_NUM_LINES_LOG2 10
211
212#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
213#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
214
215#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
216#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
217
218#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
219 & (Addr)(FI_NUM_LINES-1) )
220
221
222/* In the lines, each 8 bytes are treated individually, and are mapped
223 to a UShort. Regardless of endianness of the underlying machine,
224 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
225 the highest address.
226
227 Of each bit pair, the higher numbered bit is set if a R has been
228 seen, so the actual layout is:
229
230 15 14 ... 01 00
231
232 R W for addr+7 ... R W for addr+0
233
234 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
235*/
236
237/* tags are separated from lines. tags are Addrs and are
238 the base address of the line. */
239typedef
240 struct {
241 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
242 }
243 FiLine;
244
245typedef
246 struct {
247 Addr tags[FI_NUM_LINES];
248 FiLine lines[FI_NUM_LINES];
249 }
250 Filter;
251
252
253
254/////////////////////////////////////////////////////////////////
255/////////////////////////////////////////////////////////////////
256// //
257// data decls: Thr, ULong_n_EC //
258// //
259/////////////////////////////////////////////////////////////////
260/////////////////////////////////////////////////////////////////
261
262// Records stacks for H1 history mechanism (DRD-style)
263typedef
264 struct { ULong ull; ExeContext* ec; }
265 ULong_n_EC;
266
267
268/* How many of the above records to collect for each thread? Older
269 ones are dumped when we run out of space. 62.5k requires 1MB per
270 thread, since each ULong_n_EC record is 16 bytes long. When more
271 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
272 deleted to make space. Hence in the worst case we will be able to
273 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
274 Kw transitions (segments in this thread). For the current setting
275 that gives a guaranteed stack for at least the last 31.25k
276 segments. */
277#define N_KWs_N_STACKs_PER_THREAD 62500
278
279
280struct _Thr {
281 /* Current VTSs for this thread. They change as we go along. viR
282 is the VTS to be used for reads, viW for writes. Usually they
283 are the same, but can differ when we deal with reader-writer
284 locks. It is always the case that
285 VtsID__cmpLEQ(viW,viR) == True
286 that is, viW must be the same, or lagging behind, viR. */
287 VtsID viR;
288 VtsID viW;
289
290 /* Is initially False, and is set to True after the thread really
291 has done a low-level exit. When True, we expect to never see
292 any more memory references done by this thread. */
293 Bool llexit_done;
294
295 /* Is initially False, and is set to True after the thread has been
296 joined with (reaped by some other thread). After this point, we
297 do not expect to see any uses of .viR or .viW, so it is safe to
298 set them to VtsID_INVALID. */
299 Bool joinedwith_done;
300
301 /* A small integer giving a unique identity to this Thr. See
302 comments on the definition of ScalarTS for details. */
303 ThrID thrid : SCALARTS_N_THRBITS;
304
305 /* A filter that removes references for which we believe that
306 msmcread/msmcwrite will not change the state, nor report a
307 race. */
308 Filter* filter;
309
310 /* A pointer back to the top level Thread structure. There is a
311 1-1 mapping between Thread and Thr structures -- each Thr points
312 at its corresponding Thread, and vice versa. Really, Thr and
313 Thread should be merged into a single structure. */
314 Thread* hgthread;
315
316 /* The ULongs (scalar Kws) in this accumulate in strictly
317 increasing order, without duplicates. This is important because
318 we need to be able to find a given scalar Kw in this array
319 later, by binary search. */
320 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
321};
322
323
324
325/////////////////////////////////////////////////////////////////
326/////////////////////////////////////////////////////////////////
327// //
328// data decls: SO //
329// //
330/////////////////////////////////////////////////////////////////
331/////////////////////////////////////////////////////////////////
332
333// (UInt) `echo "Synchronisation object" | md5sum`
334#define SO_MAGIC 0x56b3c5b0U
335
336struct _SO {
337 struct _SO* admin_prev;
338 struct _SO* admin_next;
339 VtsID viR; /* r-clock of sender */
340 VtsID viW; /* w-clock of sender */
341 UInt magic;
342};
343
344
345
346/////////////////////////////////////////////////////////////////
347/////////////////////////////////////////////////////////////////
348// //
sewardj8f5374e2008-12-07 11:40:17 +0000349// Forward declarations //
350// //
351/////////////////////////////////////////////////////////////////
352/////////////////////////////////////////////////////////////////
353
sewardjf98e1c02008-10-25 16:22:41 +0000354/* fwds for
355 Globals needed by other parts of the library. These are set
356 once at startup and then never changed. */
357static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000358static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000359
sewardjffce8152011-06-24 10:09:41 +0000360/* misc fn and data fwdses */
361static void VtsID__rcinc ( VtsID ii );
362static void VtsID__rcdec ( VtsID ii );
363
364static inline Bool SVal__isC ( SVal s );
365static inline VtsID SVal__unC_Rmin ( SVal s );
366static inline VtsID SVal__unC_Wmin ( SVal s );
367static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
philippe1475a7f2015-05-11 19:45:08 +0000368static inline void SVal__rcinc ( SVal s );
369static inline void SVal__rcdec ( SVal s );
sewardjffce8152011-06-24 10:09:41 +0000370
371/* A double linked list of all the SO's. */
372SO* admin_SO;
373
sewardjf98e1c02008-10-25 16:22:41 +0000374
375
376/////////////////////////////////////////////////////////////////
377/////////////////////////////////////////////////////////////////
378// //
379// SECTION BEGIN compressed shadow memory //
380// //
381/////////////////////////////////////////////////////////////////
382/////////////////////////////////////////////////////////////////
383
384#ifndef __HB_ZSM_H
385#define __HB_ZSM_H
386
sewardjf98e1c02008-10-25 16:22:41 +0000387/* Initialise the library. Once initialised, it will (or may) call
philippe1475a7f2015-05-11 19:45:08 +0000388 SVal__rcinc and SVal__rcdec in response to all the calls below, in order to
sewardjf98e1c02008-10-25 16:22:41 +0000389 allow the user to do reference counting on the SVals stored herein.
390 It is important to understand, however, that due to internal
391 caching, the reference counts are in general inaccurate, and can be
392 both above or below the true reference count for an item. In
393 particular, the library may indicate that the reference count for
394 an item is zero, when in fact it is not.
395
396 To make the reference counting exact and therefore non-pointless,
397 call zsm_flush_cache. Immediately after it returns, the reference
398 counts for all items, as deduced by the caller by observing calls
philippe1475a7f2015-05-11 19:45:08 +0000399 to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a
400 zero reference count may be freed (or at least considered to be
sewardjf98e1c02008-10-25 16:22:41 +0000401 unreferenced by this library).
402*/
philippe1475a7f2015-05-11 19:45:08 +0000403static void zsm_init ( void );
sewardjf98e1c02008-10-25 16:22:41 +0000404
sewardj23f12002009-07-24 08:45:08 +0000405static void zsm_sset_range ( Addr, SizeT, SVal );
philippef54cb662015-05-10 22:19:31 +0000406static void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew );
sewardj23f12002009-07-24 08:45:08 +0000407static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000408static void zsm_flush_cache ( void );
409
410#endif /* ! __HB_ZSM_H */
411
412
sewardjf98e1c02008-10-25 16:22:41 +0000413/* Round a up to the next multiple of N. N must be a power of 2 */
414#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
415/* Round a down to the next multiple of N. N must be a power of 2 */
416#define ROUNDDN(a, N) ((a) & ~(N-1))
417
philippef54cb662015-05-10 22:19:31 +0000418/* True if a belongs in range [start, start + szB[
419 (i.e. start + szB is excluded). */
420static inline Bool address_in_range (Addr a, Addr start, SizeT szB)
421{
422 /* Checking start <= a && a < start + szB.
423 As start and a are unsigned addresses, the condition can
424 be simplified. */
425 if (CHECK_ZSM)
426 tl_assert ((a - start < szB)
427 == (start <= a
428 && a < start + szB));
429 return a - start < szB;
430}
sewardjf98e1c02008-10-25 16:22:41 +0000431
sewardjf98e1c02008-10-25 16:22:41 +0000432/* ------ CacheLine ------ */
433
434#define N_LINE_BITS 6 /* must be >= 3 */
435#define N_LINE_ARANGE (1 << N_LINE_BITS)
436#define N_LINE_TREES (N_LINE_ARANGE >> 3)
437
438typedef
439 struct {
440 UShort descrs[N_LINE_TREES];
441 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
442 }
443 CacheLine;
444
445#define TREE_DESCR_16_0 (1<<0)
446#define TREE_DESCR_32_0 (1<<1)
447#define TREE_DESCR_16_1 (1<<2)
448#define TREE_DESCR_64 (1<<3)
449#define TREE_DESCR_16_2 (1<<4)
450#define TREE_DESCR_32_1 (1<<5)
451#define TREE_DESCR_16_3 (1<<6)
452#define TREE_DESCR_8_0 (1<<7)
453#define TREE_DESCR_8_1 (1<<8)
454#define TREE_DESCR_8_2 (1<<9)
455#define TREE_DESCR_8_3 (1<<10)
456#define TREE_DESCR_8_4 (1<<11)
457#define TREE_DESCR_8_5 (1<<12)
458#define TREE_DESCR_8_6 (1<<13)
459#define TREE_DESCR_8_7 (1<<14)
460#define TREE_DESCR_DTY (1<<15)
461
462typedef
463 struct {
464 SVal dict[4]; /* can represent up to 4 diff values in the line */
465 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
466 dict indexes */
467 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
468 LineF to use, and dict[2..] are also SVal_INVALID. */
469 }
470 LineZ; /* compressed rep for a cache line */
471
472typedef
473 struct {
474 Bool inUse;
475 SVal w64s[N_LINE_ARANGE];
476 }
477 LineF; /* full rep for a cache line */
478
479/* Shadow memory.
480 Primary map is a WordFM Addr SecMap*.
481 SecMaps cover some page-size-ish section of address space and hold
482 a compressed representation.
483 CacheLine-sized chunks of SecMaps are copied into a Cache, being
484 decompressed when moved into the cache and recompressed on the
485 way out. Because of this, the cache must operate as a writeback
486 cache, not a writethrough one.
487
488 Each SecMap must hold a power-of-2 number of CacheLines. Hence
489 N_SECMAP_BITS must >= N_LINE_BITS.
490*/
491#define N_SECMAP_BITS 13
492#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
493
494// # CacheLines held by a SecMap
495#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
496
497/* The data in the SecMap is held in the array of LineZs. Each LineZ
498 either carries the required data directly, in a compressed
499 representation, or it holds (in .dict[0]) an index to the LineF in
500 .linesF that holds the full representation.
501
502 Currently-unused LineF's have their .inUse bit set to zero.
503 Since each in-use LineF is referred to be exactly one LineZ,
504 the number of .linesZ[] that refer to .linesF should equal
505 the number of .linesF[] that have .inUse == True.
506
507 RC obligations: the RCs presented to the user include exactly
508 the values in:
509 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
510 * F reps that are in use (.inUse == True)
511
512 Hence the following actions at the following transitions are required:
513
514 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
515 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
516 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
517 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
518*/
519typedef
520 struct {
521 UInt magic;
522 LineZ linesZ[N_SECMAP_ZLINES];
523 LineF* linesF;
524 UInt linesF_size;
525 }
526 SecMap;
527
528#define SecMap_MAGIC 0x571e58cbU
529
philippef54cb662015-05-10 22:19:31 +0000530// (UInt) `echo "Free SecMap" | md5sum`
531#define SecMap_free_MAGIC 0x5a977f30U
532
sewardj5aa09bf2014-06-20 14:25:53 +0000533__attribute__((unused))
sewardjf98e1c02008-10-25 16:22:41 +0000534static inline Bool is_sane_SecMap ( SecMap* sm ) {
535 return sm != NULL && sm->magic == SecMap_MAGIC;
536}
537
538/* ------ Cache ------ */
539
540#define N_WAY_BITS 16
541#define N_WAY_NENT (1 << N_WAY_BITS)
542
543/* Each tag is the address of the associated CacheLine, rounded down
544 to a CacheLine address boundary. A CacheLine size must be a power
545 of 2 and must be 8 or more. Hence an easy way to initialise the
546 cache so it is empty is to set all the tag values to any value % 8
547 != 0, eg 1. This means all queries in the cache initially miss.
548 It does however require us to detect and not writeback, any line
549 with a bogus tag. */
550typedef
551 struct {
552 CacheLine lyns0[N_WAY_NENT];
553 Addr tags0[N_WAY_NENT];
554 }
555 Cache;
556
557static inline Bool is_valid_scache_tag ( Addr tag ) {
558 /* a valid tag should be naturally aligned to the start of
559 a CacheLine. */
560 return 0 == (tag & (N_LINE_ARANGE - 1));
561}
562
563
564/* --------- Primary data structures --------- */
565
566/* Shadow memory primary map */
567static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
568static Cache cache_shmem;
569
570
571static UWord stats__secmaps_search = 0; // # SM finds
572static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
573static UWord stats__secmaps_allocd = 0; // # SecMaps issued
philippef54cb662015-05-10 22:19:31 +0000574static UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live'
575static UWord stats__secmaps_scanGC = 0; // # nr of scan GC done.
576static UWord stats__secmaps_scanGCed = 0; // # SecMaps GC-ed via scan
577static UWord stats__secmaps_ssetGCed = 0; // # SecMaps GC-ed via setnoaccess
sewardjf98e1c02008-10-25 16:22:41 +0000578static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
579static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
580static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
581static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
582static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
sewardjf98e1c02008-10-25 16:22:41 +0000583static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
584static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
585static UWord stats__cache_F_fetches = 0; // # F lines fetched
586static UWord stats__cache_F_wbacks = 0; // # F lines written back
philippef54cb662015-05-10 22:19:31 +0000587static UWord stats__cache_flushes_invals = 0; // # cache flushes and invals
sewardjf98e1c02008-10-25 16:22:41 +0000588static UWord stats__cache_totrefs = 0; // # total accesses
589static UWord stats__cache_totmisses = 0; // # misses
590static ULong stats__cache_make_New_arange = 0; // total arange made New
591static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
592static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000593static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
594static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
595static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
596static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
597static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
598static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
599static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
600static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
601static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
602static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
603static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
604static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
605static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
606static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000607static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
608static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
609static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
610static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
611static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
612static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000613static UWord stats__vts__tick = 0; // # calls to VTS__tick
614static UWord stats__vts__join = 0; // # calls to VTS__join
615static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
616static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
philippef54cb662015-05-10 22:19:31 +0000617static UWord stats__vts_tab_GC = 0; // # nr of vts_tab GC
philippe2bd23262015-05-11 20:56:49 +0000618static UWord stats__vts_pruning = 0; // # nr of vts pruning
sewardj7aa38a92011-02-27 23:04:12 +0000619
620// # calls to VTS__cmp_structural w/ slow case
621static UWord stats__vts__cmp_structural_slow = 0;
622
623// # calls to VTS__indexAt_SLOW
624static UWord stats__vts__indexat_slow = 0;
625
626// # calls to vts_set__find__or__clone_and_add
627static UWord stats__vts_set__focaa = 0;
628
629// # calls to vts_set__find__or__clone_and_add that lead to an
630// allocation
631static UWord stats__vts_set__focaa_a = 0;
sewardjc8028ad2010-05-05 09:34:42 +0000632
sewardjf98e1c02008-10-25 16:22:41 +0000633
634static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
635 return a & ~(N_SECMAP_ARANGE - 1);
636}
637static inline UWord shmem__get_SecMap_offset ( Addr a ) {
638 return a & (N_SECMAP_ARANGE - 1);
639}
640
641
642/*----------------------------------------------------------------*/
643/*--- map_shmem :: WordFM Addr SecMap ---*/
644/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
645/*----------------------------------------------------------------*/
646
647/*--------------- SecMap allocation --------------- */
648
649static HChar* shmem__bigchunk_next = NULL;
650static HChar* shmem__bigchunk_end1 = NULL;
651
652static void* shmem__bigchunk_alloc ( SizeT n )
653{
654 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
655 tl_assert(n > 0);
656 n = VG_ROUNDUP(n, 16);
657 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
658 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
659 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
660 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
661 if (0)
662 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
663 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
664 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
665 if (shmem__bigchunk_next == NULL)
666 VG_(out_of_memory_NORETURN)(
667 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
668 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
669 }
670 tl_assert(shmem__bigchunk_next);
671 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
672 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
673 shmem__bigchunk_next += n;
674 return shmem__bigchunk_next - n;
675}
676
philippef54cb662015-05-10 22:19:31 +0000677/* SecMap changed to be fully SVal_NOACCESS are inserted in a list of
678 recycled SecMap. When a new SecMap is needed, a recycled SecMap
679 will be used in preference to allocating a new SecMap. */
680/* We make a linked list of SecMap. LinesF pointer is re-used to
681 implement the link list. */
682static SecMap *SecMap_freelist = NULL;
683static UWord SecMap_freelist_length(void)
684{
685 SecMap *sm;
686 UWord n = 0;
687
688 sm = SecMap_freelist;
689 while (sm) {
690 n++;
691 sm = (SecMap*)sm->linesF;
692 }
693 return n;
694}
695
696static void push_SecMap_on_freelist(SecMap* sm)
697{
698 if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm);
699 sm->magic = SecMap_free_MAGIC;
700 sm->linesF = (LineF*)SecMap_freelist;
701 SecMap_freelist = sm;
702}
703/* Returns a free SecMap if there is one.
704 Otherwise, returns NULL. */
705static SecMap *pop_SecMap_from_freelist(void)
706{
707 SecMap *sm;
708
709 sm = SecMap_freelist;
710 if (sm) {
711 tl_assert (sm->magic == SecMap_free_MAGIC);
712 SecMap_freelist = (SecMap*)sm->linesF;
713 if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm);
714 }
715 return sm;
716}
717
718static SecMap* shmem__alloc_or_recycle_SecMap ( void )
sewardjf98e1c02008-10-25 16:22:41 +0000719{
720 Word i, j;
philippef54cb662015-05-10 22:19:31 +0000721 SecMap* sm = pop_SecMap_from_freelist();
722
723 if (!sm) {
724 sm = shmem__bigchunk_alloc( sizeof(SecMap) );
725 stats__secmaps_allocd++;
726 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
727 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
728 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
729 }
sewardjf98e1c02008-10-25 16:22:41 +0000730 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
731 tl_assert(sm);
732 sm->magic = SecMap_MAGIC;
733 for (i = 0; i < N_SECMAP_ZLINES; i++) {
734 sm->linesZ[i].dict[0] = SVal_NOACCESS;
735 sm->linesZ[i].dict[1] = SVal_INVALID;
736 sm->linesZ[i].dict[2] = SVal_INVALID;
737 sm->linesZ[i].dict[3] = SVal_INVALID;
738 for (j = 0; j < N_LINE_ARANGE/4; j++)
739 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
740 }
741 sm->linesF = NULL;
742 sm->linesF_size = 0;
sewardjf98e1c02008-10-25 16:22:41 +0000743 return sm;
744}
745
746typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
747static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
748
749static SecMap* shmem__find_SecMap ( Addr ga )
750{
751 SecMap* sm = NULL;
752 Addr gaKey = shmem__round_to_SecMap_base(ga);
753 // Cache
754 stats__secmaps_search++;
755 if (LIKELY(gaKey == smCache[0].gaKey))
756 return smCache[0].sm;
757 if (LIKELY(gaKey == smCache[1].gaKey)) {
758 SMCacheEnt tmp = smCache[0];
759 smCache[0] = smCache[1];
760 smCache[1] = tmp;
761 return smCache[0].sm;
762 }
763 if (gaKey == smCache[2].gaKey) {
764 SMCacheEnt tmp = smCache[1];
765 smCache[1] = smCache[2];
766 smCache[2] = tmp;
767 return smCache[1].sm;
768 }
769 // end Cache
770 stats__secmaps_search_slow++;
771 if (VG_(lookupFM)( map_shmem,
772 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
773 tl_assert(sm != NULL);
774 smCache[2] = smCache[1];
775 smCache[1] = smCache[0];
776 smCache[0].gaKey = gaKey;
777 smCache[0].sm = sm;
778 } else {
779 tl_assert(sm == NULL);
780 }
781 return sm;
782}
783
philippef54cb662015-05-10 22:19:31 +0000784/* Scan the SecMap and count the SecMap that can be GC-ed.
785 If really, really does the GC of the SecMap. */
786/* NOT TO BE CALLED FROM WITHIN libzsm. */
787static UWord next_SecMap_GC_at = 1000;
788__attribute__((noinline))
789static UWord shmem__SecMap_do_GC(Bool really)
790{
791 UWord secmapW = 0;
792 Addr gaKey;
793 UWord examined = 0;
794 UWord ok_GCed = 0;
795
796 /* First invalidate the smCache */
797 smCache[0].gaKey = 1;
798 smCache[1].gaKey = 1;
799 smCache[2].gaKey = 1;
800 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0]));
801
802 VG_(initIterFM)( map_shmem );
803 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
804 UWord i;
805 UWord j;
806 SecMap* sm = (SecMap*)secmapW;
807 tl_assert(sm->magic == SecMap_MAGIC);
808 Bool ok_to_GC = True;
809
810 examined++;
811
812 /* Deal with the LineZs */
813 for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) {
814 LineZ* lineZ = &sm->linesZ[i];
815 ok_to_GC = lineZ->dict[0] == SVal_INVALID
816 || (lineZ->dict[0] == SVal_NOACCESS
817 && !SVal__isC (lineZ->dict[1])
818 && !SVal__isC (lineZ->dict[2])
819 && !SVal__isC (lineZ->dict[3]));
820 }
821 /* Deal with the LineFs */
822 for (i = 0; i < sm->linesF_size && ok_to_GC; i++) {
823 LineF* lineF = &sm->linesF[i];
824 if (!lineF->inUse)
825 continue;
826 for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++)
827 ok_to_GC = lineF->w64s[j] == SVal_NOACCESS;
828 }
829 if (ok_to_GC)
830 ok_GCed++;
831 if (ok_to_GC && really) {
832 SecMap *fm_sm;
833 Addr fm_gaKey;
834 /* We cannot remove a SecMap from map_shmem while iterating.
835 So, stop iteration, remove from map_shmem, recreate the iteration
836 on the next SecMap. */
837 VG_(doneIterFM) ( map_shmem );
838 /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS or
839 not in use. We just need to free the linesF. */
840 if (sm->linesF_size > 0) {
841 HG_(free)(sm->linesF);
842 stats__secmap_linesF_allocd -= sm->linesF_size;
843 stats__secmap_linesF_bytes -= sm->linesF_size * sizeof(LineF);
844 }
845 if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey))
846 tl_assert (0);
847 stats__secmaps_in_map_shmem--;
848 tl_assert (gaKey == fm_gaKey);
849 tl_assert (sm == fm_sm);
850 stats__secmaps_scanGCed++;
851 push_SecMap_on_freelist (sm);
852 VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE);
853 }
854 }
855 VG_(doneIterFM)( map_shmem );
856
857 if (really) {
858 stats__secmaps_scanGC++;
859 /* Next GC when we approach the max allocated */
860 next_SecMap_GC_at = stats__secmaps_allocd - 1000;
861 /* Unless we GCed less than 10%. We then allow to alloc 10%
862 more before GCing. This avoids doing a lot of costly GC
863 for the worst case : the 'growing phase' of an application
864 that allocates a lot of memory.
865 Worst can can be reproduced e.g. by
866 perf/memrw -t 30000000 -b 1000 -r 1 -l 1
867 that allocates around 30Gb of memory. */
868 if (ok_GCed < stats__secmaps_allocd/10)
869 next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10;
870
871 }
872
873 if (VG_(clo_stats) && really) {
874 VG_(message)(Vg_DebugMsg,
875 "libhb: SecMap GC: #%lu scanned %lu, GCed %lu,"
876 " next GC at %lu\n",
877 stats__secmaps_scanGC, examined, ok_GCed,
878 next_SecMap_GC_at);
879 }
880
881 return ok_GCed;
882}
883
sewardjf98e1c02008-10-25 16:22:41 +0000884static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
885{
886 SecMap* sm = shmem__find_SecMap ( ga );
887 if (LIKELY(sm)) {
philippef54cb662015-05-10 22:19:31 +0000888 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000889 return sm;
890 } else {
891 /* create a new one */
892 Addr gaKey = shmem__round_to_SecMap_base(ga);
philippef54cb662015-05-10 22:19:31 +0000893 sm = shmem__alloc_or_recycle_SecMap();
sewardjf98e1c02008-10-25 16:22:41 +0000894 tl_assert(sm);
895 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
philippef54cb662015-05-10 22:19:31 +0000896 stats__secmaps_in_map_shmem++;
897 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000898 return sm;
899 }
900}
901
philippe0fb30ac2015-05-15 13:17:17 +0000902/* Returns the nr of linesF which are in use. Note: this is scanning
903 the secmap wordFM. So, this is to be used for statistics only. */
904__attribute__((noinline))
905static UWord shmem__SecMap_used_linesF(void)
906{
907 UWord secmapW = 0;
908 Addr gaKey;
909 UWord inUse = 0;
910 UWord total = 0;
911
912 VG_(initIterFM)( map_shmem );
913 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
914 UWord i;
915 SecMap* sm = (SecMap*)secmapW;
916 tl_assert(sm->magic == SecMap_MAGIC);
917
918 for (i = 0; i < sm->linesF_size; i++) {
919 LineF* lineF = &sm->linesF[i];
920 if (lineF->inUse)
921 inUse++;
922 total++;
923 }
924 }
925 VG_(doneIterFM)( map_shmem );
926 tl_assert (stats__secmap_linesF_allocd == total);
927
928 return inUse;
929}
sewardjf98e1c02008-10-25 16:22:41 +0000930
931/* ------------ LineF and LineZ related ------------ */
932
933static void rcinc_LineF ( LineF* lineF ) {
934 UWord i;
935 tl_assert(lineF->inUse);
936 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000937 SVal__rcinc(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000938}
939
940static void rcdec_LineF ( LineF* lineF ) {
941 UWord i;
942 tl_assert(lineF->inUse);
943 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000944 SVal__rcdec(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000945}
946
947static void rcinc_LineZ ( LineZ* lineZ ) {
948 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000949 SVal__rcinc(lineZ->dict[0]);
950 if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]);
951 if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]);
952 if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000953}
954
955static void rcdec_LineZ ( LineZ* lineZ ) {
956 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000957 SVal__rcdec(lineZ->dict[0]);
958 if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]);
959 if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]);
960 if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000961}
962
963inline
964static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
965 Word bix, shft, mask, prep;
966 tl_assert(ix >= 0);
967 bix = ix >> 2;
968 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
969 mask = 3 << shft;
970 prep = b2 << shft;
971 arr[bix] = (arr[bix] & ~mask) | prep;
972}
973
974inline
975static UWord read_twobit_array ( UChar* arr, UWord ix ) {
976 Word bix, shft;
977 tl_assert(ix >= 0);
978 bix = ix >> 2;
979 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
980 return (arr[bix] >> shft) & 3;
981}
982
983/* Given address 'tag', find either the Z or F line containing relevant
984 data, so it can be read into the cache.
985*/
986static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
987 /*OUT*/LineF** fp, Addr tag ) {
988 LineZ* lineZ;
989 LineF* lineF;
990 UWord zix;
991 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
992 UWord smoff = shmem__get_SecMap_offset(tag);
993 /* since smoff is derived from a valid tag, it should be
994 cacheline-aligned. */
995 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
996 zix = smoff >> N_LINE_BITS;
997 tl_assert(zix < N_SECMAP_ZLINES);
998 lineZ = &sm->linesZ[zix];
999 lineF = NULL;
1000 if (lineZ->dict[0] == SVal_INVALID) {
1001 UInt fix = (UInt)lineZ->dict[1];
1002 tl_assert(sm->linesF);
1003 tl_assert(sm->linesF_size > 0);
1004 tl_assert(fix >= 0 && fix < sm->linesF_size);
1005 lineF = &sm->linesF[fix];
1006 tl_assert(lineF->inUse);
1007 lineZ = NULL;
1008 }
1009 *zp = lineZ;
1010 *fp = lineF;
1011}
1012
1013/* Given address 'tag', return the relevant SecMap and the index of
1014 the LineZ within it, in the expectation that the line is to be
1015 overwritten. Regardless of whether 'tag' is currently associated
1016 with a Z or F representation, to rcdec on the current
1017 representation, in recognition of the fact that the contents are
1018 just about to be overwritten. */
1019static __attribute__((noinline))
1020void find_Z_for_writing ( /*OUT*/SecMap** smp,
1021 /*OUT*/Word* zixp,
1022 Addr tag ) {
1023 LineZ* lineZ;
1024 LineF* lineF;
1025 UWord zix;
1026 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1027 UWord smoff = shmem__get_SecMap_offset(tag);
1028 /* since smoff is derived from a valid tag, it should be
1029 cacheline-aligned. */
1030 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1031 zix = smoff >> N_LINE_BITS;
1032 tl_assert(zix < N_SECMAP_ZLINES);
1033 lineZ = &sm->linesZ[zix];
1034 lineF = NULL;
1035 /* re RCs, we are freeing up this LineZ/LineF so that new data can
1036 be parked in it. Hence have to rcdec it accordingly. */
1037 /* If lineZ has an associated lineF, free it up. */
1038 if (lineZ->dict[0] == SVal_INVALID) {
1039 UInt fix = (UInt)lineZ->dict[1];
1040 tl_assert(sm->linesF);
1041 tl_assert(sm->linesF_size > 0);
1042 tl_assert(fix >= 0 && fix < sm->linesF_size);
1043 lineF = &sm->linesF[fix];
1044 tl_assert(lineF->inUse);
1045 rcdec_LineF(lineF);
1046 lineF->inUse = False;
1047 } else {
1048 rcdec_LineZ(lineZ);
1049 }
1050 *smp = sm;
1051 *zixp = zix;
1052}
1053
1054static __attribute__((noinline))
1055void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
1056 UInt i, new_size;
1057 LineF* nyu;
1058
1059 if (sm->linesF) {
1060 tl_assert(sm->linesF_size > 0);
1061 } else {
1062 tl_assert(sm->linesF_size == 0);
1063 }
1064
1065 if (sm->linesF) {
1066 for (i = 0; i < sm->linesF_size; i++) {
1067 if (!sm->linesF[i].inUse) {
1068 *fixp = (Word)i;
1069 return;
1070 }
1071 }
1072 }
1073
1074 /* No free F line found. Expand existing array and try again. */
1075 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
1076 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
1077 new_size * sizeof(LineF) );
sewardjf98e1c02008-10-25 16:22:41 +00001078
1079 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
1080 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
1081 * sizeof(LineF);
1082
1083 if (0)
1084 VG_(printf)("SM %p: expand F array from %d to %d\n",
1085 sm, (Int)sm->linesF_size, new_size);
1086
1087 for (i = 0; i < new_size; i++)
1088 nyu[i].inUse = False;
1089
1090 if (sm->linesF) {
1091 for (i = 0; i < sm->linesF_size; i++) {
1092 tl_assert(sm->linesF[i].inUse);
1093 nyu[i] = sm->linesF[i];
1094 }
1095 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
1096 HG_(free)(sm->linesF);
1097 }
1098
1099 sm->linesF = nyu;
1100 sm->linesF_size = new_size;
1101
1102 for (i = 0; i < sm->linesF_size; i++) {
1103 if (!sm->linesF[i].inUse) {
1104 *fixp = (Word)i;
1105 return;
1106 }
philippe47124e92015-04-25 14:00:24 +00001107 }
sewardjf98e1c02008-10-25 16:22:41 +00001108
philippe47124e92015-04-25 14:00:24 +00001109 /*NOTREACHED*/
1110 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00001111}
1112
1113
1114/* ------------ CacheLine and implicit-tree related ------------ */
1115
1116__attribute__((unused))
1117static void pp_CacheLine ( CacheLine* cl ) {
1118 Word i;
1119 if (!cl) {
1120 VG_(printf)("%s","pp_CacheLine(NULL)\n");
1121 return;
1122 }
1123 for (i = 0; i < N_LINE_TREES; i++)
1124 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
1125 for (i = 0; i < N_LINE_ARANGE; i++)
1126 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
1127}
1128
1129static UChar descr_to_validbits ( UShort descr )
1130{
1131 /* a.k.a Party Time for gcc's constant folder */
1132# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
1133 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
1134 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
1135 ( (b8_5) << 12) | ( (b8_4) << 11) | \
1136 ( (b8_3) << 10) | ( (b8_2) << 9) | \
1137 ( (b8_1) << 8) | ( (b8_0) << 7) | \
1138 ( (b16_3) << 6) | ( (b32_1) << 5) | \
1139 ( (b16_2) << 4) | ( (b64) << 3) | \
1140 ( (b16_1) << 2) | ( (b32_0) << 1) | \
1141 ( (b16_0) << 0) ) )
1142
1143# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
1144 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
1145 ( (bit5) << 5) | ( (bit4) << 4) | \
1146 ( (bit3) << 3) | ( (bit2) << 2) | \
1147 ( (bit1) << 1) | ( (bit0) << 0) ) )
1148
1149 /* these should all get folded out at compile time */
1150 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
1151 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
1152 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
1153 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
1154 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
1155 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
1156 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
1157 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
1158 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
1159
1160 switch (descr) {
1161 /*
1162 +--------------------------------- TREE_DESCR_8_7
1163 | +------------------- TREE_DESCR_8_0
1164 | | +---------------- TREE_DESCR_16_3
1165 | | | +-------------- TREE_DESCR_32_1
1166 | | | | +------------ TREE_DESCR_16_2
1167 | | | | | +--------- TREE_DESCR_64
1168 | | | | | | +------ TREE_DESCR_16_1
1169 | | | | | | | +---- TREE_DESCR_32_0
1170 | | | | | | | | +-- TREE_DESCR_16_0
1171 | | | | | | | | |
1172 | | | | | | | | | GRANULARITY, 7 -> 0 */
1173 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
1174 return BYTE(1,1,1,1,1,1,1,1);
1175 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
1176 return BYTE(1,1,0,1,1,1,1,1);
1177 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
1178 return BYTE(0,1,1,1,1,1,1,1);
1179 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
1180 return BYTE(0,1,0,1,1,1,1,1);
1181
1182 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
1183 return BYTE(1,1,1,1,1,1,0,1);
1184 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
1185 return BYTE(1,1,0,1,1,1,0,1);
1186 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
1187 return BYTE(0,1,1,1,1,1,0,1);
1188 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
1189 return BYTE(0,1,0,1,1,1,0,1);
1190
1191 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
1192 return BYTE(1,1,1,1,0,1,1,1);
1193 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1194 return BYTE(1,1,0,1,0,1,1,1);
1195 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1196 return BYTE(0,1,1,1,0,1,1,1);
1197 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1198 return BYTE(0,1,0,1,0,1,1,1);
1199
1200 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1201 return BYTE(1,1,1,1,0,1,0,1);
1202 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1203 return BYTE(1,1,0,1,0,1,0,1);
1204 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1205 return BYTE(0,1,1,1,0,1,0,1);
1206 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1207 return BYTE(0,1,0,1,0,1,0,1);
1208
1209 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1210 return BYTE(0,0,0,1,1,1,1,1);
1211 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1212 return BYTE(0,0,0,1,1,1,0,1);
1213 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1214 return BYTE(0,0,0,1,0,1,1,1);
1215 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1216 return BYTE(0,0,0,1,0,1,0,1);
1217
1218 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1219 return BYTE(1,1,1,1,0,0,0,1);
1220 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1221 return BYTE(1,1,0,1,0,0,0,1);
1222 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1223 return BYTE(0,1,1,1,0,0,0,1);
1224 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1225 return BYTE(0,1,0,1,0,0,0,1);
1226
1227 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1228 return BYTE(0,0,0,1,0,0,0,1);
1229
1230 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1231 return BYTE(0,0,0,0,0,0,0,1);
1232
1233 default: return BYTE(0,0,0,0,0,0,0,0);
1234 /* INVALID - any valid descr produces at least one
1235 valid bit in tree[0..7]*/
1236 }
1237 /* NOTREACHED*/
1238 tl_assert(0);
1239
1240# undef DESCR
1241# undef BYTE
1242}
1243
1244__attribute__((unused))
1245static Bool is_sane_Descr ( UShort descr ) {
1246 return descr_to_validbits(descr) != 0;
1247}
1248
1249static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1250 VG_(sprintf)(dst,
1251 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1252 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1253 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1254 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1255 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1256 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1257 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1258 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1259 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1260 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1261 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1262 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1263 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1264 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1265 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1266 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1267 );
1268}
1269static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1270 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1271 (Int)((byte & 128) ? 1 : 0),
1272 (Int)((byte & 64) ? 1 : 0),
1273 (Int)((byte & 32) ? 1 : 0),
1274 (Int)((byte & 16) ? 1 : 0),
1275 (Int)((byte & 8) ? 1 : 0),
1276 (Int)((byte & 4) ? 1 : 0),
1277 (Int)((byte & 2) ? 1 : 0),
1278 (Int)((byte & 1) ? 1 : 0)
1279 );
1280}
1281
1282static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1283 Word i;
1284 UChar validbits = descr_to_validbits(descr);
florian7b7d5942014-12-19 20:29:22 +00001285 HChar buf[128], buf2[128]; // large enough
sewardjf98e1c02008-10-25 16:22:41 +00001286 if (validbits == 0)
1287 goto bad;
1288 for (i = 0; i < 8; i++) {
1289 if (validbits & (1<<i)) {
1290 if (tree[i] == SVal_INVALID)
1291 goto bad;
1292 } else {
1293 if (tree[i] != SVal_INVALID)
1294 goto bad;
1295 }
1296 }
1297 return True;
1298 bad:
1299 sprintf_Descr( buf, descr );
1300 sprintf_Byte( buf2, validbits );
1301 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1302 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1303 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1304 for (i = 0; i < 8; i++)
1305 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1306 VG_(printf)("%s","}\n");
1307 return 0;
1308}
1309
1310static Bool is_sane_CacheLine ( CacheLine* cl )
1311{
1312 Word tno, cloff;
1313
1314 if (!cl) goto bad;
1315
1316 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1317 UShort descr = cl->descrs[tno];
1318 SVal* tree = &cl->svals[cloff];
1319 if (!is_sane_Descr_and_Tree(descr, tree))
1320 goto bad;
1321 }
1322 tl_assert(cloff == N_LINE_ARANGE);
1323 return True;
1324 bad:
1325 pp_CacheLine(cl);
1326 return False;
1327}
1328
1329static UShort normalise_tree ( /*MOD*/SVal* tree )
1330{
1331 UShort descr;
1332 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1333 particular no zeroes. */
philippe1475a7f2015-05-11 19:45:08 +00001334 if (CHECK_ZSM
1335 && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1336 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1337 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1338 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
sewardjf98e1c02008-10-25 16:22:41 +00001339 tl_assert(0);
1340
1341 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1342 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1343 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1344 /* build 16-bit layer */
1345 if (tree[1] == tree[0]) {
1346 tree[1] = SVal_INVALID;
1347 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1348 descr |= TREE_DESCR_16_0;
1349 }
1350 if (tree[3] == tree[2]) {
1351 tree[3] = SVal_INVALID;
1352 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1353 descr |= TREE_DESCR_16_1;
1354 }
1355 if (tree[5] == tree[4]) {
1356 tree[5] = SVal_INVALID;
1357 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1358 descr |= TREE_DESCR_16_2;
1359 }
1360 if (tree[7] == tree[6]) {
1361 tree[7] = SVal_INVALID;
1362 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1363 descr |= TREE_DESCR_16_3;
1364 }
1365 /* build 32-bit layer */
1366 if (tree[2] == tree[0]
1367 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1368 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1369 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1370 descr |= TREE_DESCR_32_0;
1371 }
1372 if (tree[6] == tree[4]
1373 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1374 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1375 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1376 descr |= TREE_DESCR_32_1;
1377 }
1378 /* build 64-bit layer */
1379 if (tree[4] == tree[0]
1380 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1381 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1382 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1383 descr |= TREE_DESCR_64;
1384 }
1385 return descr;
1386}
1387
1388/* This takes a cacheline where all the data is at the leaves
1389 (w8[..]) and builds a correctly normalised tree. */
1390static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1391{
1392 Word tno, cloff;
1393 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1394 SVal* tree = &cl->svals[cloff];
1395 cl->descrs[tno] = normalise_tree( tree );
1396 }
1397 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +00001398 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001399 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1400 stats__cline_normalises++;
1401}
1402
1403
1404typedef struct { UChar count; SVal sval; } CountedSVal;
1405
1406static
1407void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1408 /*OUT*/Word* dstUsedP,
1409 Word nDst, CacheLine* src )
1410{
1411 Word tno, cloff, dstUsed;
1412
1413 tl_assert(nDst == N_LINE_ARANGE);
1414 dstUsed = 0;
1415
1416 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1417 UShort descr = src->descrs[tno];
1418 SVal* tree = &src->svals[cloff];
1419
1420 /* sequentialise the tree described by (descr,tree). */
1421# define PUT(_n,_v) \
1422 do { dst[dstUsed ].count = (_n); \
1423 dst[dstUsed++].sval = (_v); \
1424 } while (0)
1425
1426 /* byte 0 */
1427 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1428 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1429 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1430 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1431 /* byte 1 */
1432 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1433 /* byte 2 */
1434 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1435 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1436 /* byte 3 */
1437 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1438 /* byte 4 */
1439 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1440 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1441 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1442 /* byte 5 */
1443 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1444 /* byte 6 */
1445 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1446 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1447 /* byte 7 */
1448 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1449
1450# undef PUT
1451 /* END sequentialise the tree described by (descr,tree). */
1452
1453 }
1454 tl_assert(cloff == N_LINE_ARANGE);
1455 tl_assert(dstUsed <= nDst);
1456
1457 *dstUsedP = dstUsed;
1458}
1459
1460/* Write the cacheline 'wix' to backing store. Where it ends up
1461 is determined by its tag field. */
1462static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1463{
1464 Word i, j, k, m;
1465 Addr tag;
1466 SecMap* sm;
1467 CacheLine* cl;
1468 LineZ* lineZ;
1469 LineF* lineF;
1470 Word zix, fix, csvalsUsed;
1471 CountedSVal csvals[N_LINE_ARANGE];
1472 SVal sv;
1473
1474 if (0)
1475 VG_(printf)("scache wback line %d\n", (Int)wix);
1476
1477 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1478
1479 tag = cache_shmem.tags0[wix];
1480 cl = &cache_shmem.lyns0[wix];
1481
1482 /* The cache line may have been invalidated; if so, ignore it. */
1483 if (!is_valid_scache_tag(tag))
1484 return;
1485
1486 /* Where are we going to put it? */
1487 sm = NULL;
1488 lineZ = NULL;
1489 lineF = NULL;
1490 zix = fix = -1;
1491
1492 /* find the Z line to write in and rcdec it or the associated F
1493 line. */
1494 find_Z_for_writing( &sm, &zix, tag );
1495
1496 tl_assert(sm);
1497 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1498 lineZ = &sm->linesZ[zix];
1499
1500 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001501 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001502 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1503
1504 csvalsUsed = -1;
1505 sequentialise_CacheLine( csvals, &csvalsUsed,
1506 N_LINE_ARANGE, cl );
1507 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1508 if (0) VG_(printf)("%lu ", csvalsUsed);
1509
1510 lineZ->dict[0] = lineZ->dict[1]
1511 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1512
1513 /* i indexes actual shadow values, k is cursor in csvals */
1514 i = 0;
1515 for (k = 0; k < csvalsUsed; k++) {
1516
1517 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001518 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001519 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1520 /* do we already have it? */
1521 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1522 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1523 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1524 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1525 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001526 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001527 tl_assert(sv != SVal_INVALID);
1528 if (lineZ->dict[0]
1529 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1530 if (lineZ->dict[1]
1531 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1532 if (lineZ->dict[2]
1533 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1534 if (lineZ->dict[3]
1535 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1536 break; /* we'll have to use the f rep */
1537 dict_ok:
1538 m = csvals[k].count;
1539 if (m == 8) {
1540 write_twobit_array( lineZ->ix2s, i+0, j );
1541 write_twobit_array( lineZ->ix2s, i+1, j );
1542 write_twobit_array( lineZ->ix2s, i+2, j );
1543 write_twobit_array( lineZ->ix2s, i+3, j );
1544 write_twobit_array( lineZ->ix2s, i+4, j );
1545 write_twobit_array( lineZ->ix2s, i+5, j );
1546 write_twobit_array( lineZ->ix2s, i+6, j );
1547 write_twobit_array( lineZ->ix2s, i+7, j );
1548 i += 8;
1549 }
1550 else if (m == 4) {
1551 write_twobit_array( lineZ->ix2s, i+0, j );
1552 write_twobit_array( lineZ->ix2s, i+1, j );
1553 write_twobit_array( lineZ->ix2s, i+2, j );
1554 write_twobit_array( lineZ->ix2s, i+3, j );
1555 i += 4;
1556 }
1557 else if (m == 1) {
1558 write_twobit_array( lineZ->ix2s, i+0, j );
1559 i += 1;
1560 }
1561 else if (m == 2) {
1562 write_twobit_array( lineZ->ix2s, i+0, j );
1563 write_twobit_array( lineZ->ix2s, i+1, j );
1564 i += 2;
1565 }
1566 else {
1567 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1568 }
1569
1570 }
1571
1572 if (LIKELY(i == N_LINE_ARANGE)) {
1573 /* Construction of the compressed representation was
1574 successful. */
1575 rcinc_LineZ(lineZ);
1576 stats__cache_Z_wbacks++;
1577 } else {
1578 /* Cannot use the compressed(z) representation. Use the full(f)
1579 rep instead. */
1580 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1581 alloc_F_for_writing( sm, &fix );
1582 tl_assert(sm->linesF);
1583 tl_assert(sm->linesF_size > 0);
1584 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1585 lineF = &sm->linesF[fix];
1586 tl_assert(!lineF->inUse);
1587 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1588 lineZ->dict[1] = (SVal)fix;
1589 lineF->inUse = True;
1590 i = 0;
1591 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001592 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001593 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1594 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001595 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001596 tl_assert(sv != SVal_INVALID);
1597 for (m = csvals[k].count; m > 0; m--) {
1598 lineF->w64s[i] = sv;
1599 i++;
1600 }
1601 }
1602 tl_assert(i == N_LINE_ARANGE);
1603 rcinc_LineF(lineF);
1604 stats__cache_F_wbacks++;
1605 }
sewardjf98e1c02008-10-25 16:22:41 +00001606}
1607
1608/* Fetch the cacheline 'wix' from the backing store. The tag
1609 associated with 'wix' is assumed to have already been filled in;
1610 hence that is used to determine where in the backing store to read
1611 from. */
1612static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1613{
1614 Word i;
1615 Addr tag;
1616 CacheLine* cl;
1617 LineZ* lineZ;
1618 LineF* lineF;
1619
1620 if (0)
1621 VG_(printf)("scache fetch line %d\n", (Int)wix);
1622
1623 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1624
1625 tag = cache_shmem.tags0[wix];
1626 cl = &cache_shmem.lyns0[wix];
1627
1628 /* reject nonsense requests */
1629 tl_assert(is_valid_scache_tag(tag));
1630
1631 lineZ = NULL;
1632 lineF = NULL;
1633 find_ZF_for_reading( &lineZ, &lineF, tag );
1634 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1635
1636 /* expand the data into the bottom layer of the tree, then get
1637 cacheline_normalise to build the descriptor array. */
1638 if (lineF) {
1639 tl_assert(lineF->inUse);
1640 for (i = 0; i < N_LINE_ARANGE; i++) {
1641 cl->svals[i] = lineF->w64s[i];
1642 }
1643 stats__cache_F_fetches++;
1644 } else {
1645 for (i = 0; i < N_LINE_ARANGE; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00001646 UWord ix = read_twobit_array( lineZ->ix2s, i );
philippe1475a7f2015-05-11 19:45:08 +00001647 if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3);
1648 cl->svals[i] = lineZ->dict[ix];
1649 if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00001650 }
1651 stats__cache_Z_fetches++;
1652 }
1653 normalise_CacheLine( cl );
1654}
1655
philippe8939e092015-05-11 20:18:10 +00001656/* Invalid the cachelines corresponding to the given range, which
1657 must start and end on a cacheline boundary. */
philippef54cb662015-05-10 22:19:31 +00001658static void shmem__invalidate_scache_range (Addr ga, SizeT szB)
1659{
philippef54cb662015-05-10 22:19:31 +00001660 Word wix;
1661
philippe8939e092015-05-11 20:18:10 +00001662 /* ga must be on a cacheline boundary. */
1663 tl_assert (is_valid_scache_tag (ga));
1664 /* szB must be a multiple of cacheline size. */
1665 tl_assert (0 == (szB & (N_LINE_ARANGE - 1)));
1666
1667
philippef54cb662015-05-10 22:19:31 +00001668 Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1);
1669 Word nwix = szB / N_LINE_ARANGE;
1670
1671 if (nwix > N_WAY_NENT)
1672 nwix = N_WAY_NENT; // no need to check several times the same entry.
1673
1674 for (wix = 0; wix < nwix; wix++) {
1675 if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB))
1676 cache_shmem.tags0[ga_ix] = 1/*INVALID*/;
1677 ga_ix++;
philippe364f0bb2015-05-15 09:38:54 +00001678 if (UNLIKELY(ga_ix == N_WAY_NENT))
philippef54cb662015-05-10 22:19:31 +00001679 ga_ix = 0;
1680 }
sewardjf98e1c02008-10-25 16:22:41 +00001681}
1682
philippef54cb662015-05-10 22:19:31 +00001683
sewardjf98e1c02008-10-25 16:22:41 +00001684static void shmem__flush_and_invalidate_scache ( void ) {
1685 Word wix;
1686 Addr tag;
1687 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1688 tl_assert(!is_valid_scache_tag(1));
1689 for (wix = 0; wix < N_WAY_NENT; wix++) {
1690 tag = cache_shmem.tags0[wix];
1691 if (tag == 1/*INVALID*/) {
1692 /* already invalid; nothing to do */
1693 } else {
1694 tl_assert(is_valid_scache_tag(tag));
1695 cacheline_wback( wix );
1696 }
1697 cache_shmem.tags0[wix] = 1/*INVALID*/;
1698 }
philippef54cb662015-05-10 22:19:31 +00001699 stats__cache_flushes_invals++;
sewardjf98e1c02008-10-25 16:22:41 +00001700}
1701
1702
1703static inline Bool aligned16 ( Addr a ) {
1704 return 0 == (a & 1);
1705}
1706static inline Bool aligned32 ( Addr a ) {
1707 return 0 == (a & 3);
1708}
1709static inline Bool aligned64 ( Addr a ) {
1710 return 0 == (a & 7);
1711}
1712static inline UWord get_cacheline_offset ( Addr a ) {
1713 return (UWord)(a & (N_LINE_ARANGE - 1));
1714}
1715static inline Addr cacheline_ROUNDUP ( Addr a ) {
1716 return ROUNDUP(a, N_LINE_ARANGE);
1717}
1718static inline Addr cacheline_ROUNDDN ( Addr a ) {
1719 return ROUNDDN(a, N_LINE_ARANGE);
1720}
1721static inline UWord get_treeno ( Addr a ) {
1722 return get_cacheline_offset(a) >> 3;
1723}
1724static inline UWord get_tree_offset ( Addr a ) {
1725 return a & 7;
1726}
1727
1728static __attribute__((noinline))
1729 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1730static inline CacheLine* get_cacheline ( Addr a )
1731{
1732 /* tag is 'a' with the in-line offset masked out,
1733 eg a[31]..a[4] 0000 */
1734 Addr tag = a & ~(N_LINE_ARANGE - 1);
1735 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1736 stats__cache_totrefs++;
1737 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1738 return &cache_shmem.lyns0[wix];
1739 } else {
1740 return get_cacheline_MISS( a );
1741 }
1742}
1743
1744static __attribute__((noinline))
1745 CacheLine* get_cacheline_MISS ( Addr a )
1746{
1747 /* tag is 'a' with the in-line offset masked out,
1748 eg a[31]..a[4] 0000 */
1749
1750 CacheLine* cl;
1751 Addr* tag_old_p;
1752 Addr tag = a & ~(N_LINE_ARANGE - 1);
1753 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1754
1755 tl_assert(tag != cache_shmem.tags0[wix]);
1756
1757 /* Dump the old line into the backing store. */
1758 stats__cache_totmisses++;
1759
1760 cl = &cache_shmem.lyns0[wix];
1761 tag_old_p = &cache_shmem.tags0[wix];
1762
1763 if (is_valid_scache_tag( *tag_old_p )) {
1764 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001765 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001766 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1767 cacheline_wback( wix );
1768 }
1769 /* and reload the new one */
1770 *tag_old_p = tag;
1771 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001772 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001773 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1774 return cl;
1775}
1776
1777static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1778 stats__cline_64to32pulldown++;
1779 switch (toff) {
1780 case 0: case 4:
1781 tl_assert(descr & TREE_DESCR_64);
1782 tree[4] = tree[0];
1783 descr &= ~TREE_DESCR_64;
1784 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1785 break;
1786 default:
1787 tl_assert(0);
1788 }
1789 return descr;
1790}
1791
1792static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1793 stats__cline_32to16pulldown++;
1794 switch (toff) {
1795 case 0: case 2:
1796 if (!(descr & TREE_DESCR_32_0)) {
1797 descr = pulldown_to_32(tree, 0, descr);
1798 }
1799 tl_assert(descr & TREE_DESCR_32_0);
1800 tree[2] = tree[0];
1801 descr &= ~TREE_DESCR_32_0;
1802 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1803 break;
1804 case 4: case 6:
1805 if (!(descr & TREE_DESCR_32_1)) {
1806 descr = pulldown_to_32(tree, 4, descr);
1807 }
1808 tl_assert(descr & TREE_DESCR_32_1);
1809 tree[6] = tree[4];
1810 descr &= ~TREE_DESCR_32_1;
1811 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1812 break;
1813 default:
1814 tl_assert(0);
1815 }
1816 return descr;
1817}
1818
1819static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1820 stats__cline_16to8pulldown++;
1821 switch (toff) {
1822 case 0: case 1:
1823 if (!(descr & TREE_DESCR_16_0)) {
1824 descr = pulldown_to_16(tree, 0, descr);
1825 }
1826 tl_assert(descr & TREE_DESCR_16_0);
1827 tree[1] = tree[0];
1828 descr &= ~TREE_DESCR_16_0;
1829 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1830 break;
1831 case 2: case 3:
1832 if (!(descr & TREE_DESCR_16_1)) {
1833 descr = pulldown_to_16(tree, 2, descr);
1834 }
1835 tl_assert(descr & TREE_DESCR_16_1);
1836 tree[3] = tree[2];
1837 descr &= ~TREE_DESCR_16_1;
1838 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1839 break;
1840 case 4: case 5:
1841 if (!(descr & TREE_DESCR_16_2)) {
1842 descr = pulldown_to_16(tree, 4, descr);
1843 }
1844 tl_assert(descr & TREE_DESCR_16_2);
1845 tree[5] = tree[4];
1846 descr &= ~TREE_DESCR_16_2;
1847 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1848 break;
1849 case 6: case 7:
1850 if (!(descr & TREE_DESCR_16_3)) {
1851 descr = pulldown_to_16(tree, 6, descr);
1852 }
1853 tl_assert(descr & TREE_DESCR_16_3);
1854 tree[7] = tree[6];
1855 descr &= ~TREE_DESCR_16_3;
1856 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1857 break;
1858 default:
1859 tl_assert(0);
1860 }
1861 return descr;
1862}
1863
1864
1865static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1866 UShort mask;
1867 switch (toff) {
1868 case 0:
1869 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1870 tl_assert( (descr & mask) == mask );
1871 descr &= ~mask;
1872 descr |= TREE_DESCR_16_0;
1873 break;
1874 case 2:
1875 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1876 tl_assert( (descr & mask) == mask );
1877 descr &= ~mask;
1878 descr |= TREE_DESCR_16_1;
1879 break;
1880 case 4:
1881 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1882 tl_assert( (descr & mask) == mask );
1883 descr &= ~mask;
1884 descr |= TREE_DESCR_16_2;
1885 break;
1886 case 6:
1887 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1888 tl_assert( (descr & mask) == mask );
1889 descr &= ~mask;
1890 descr |= TREE_DESCR_16_3;
1891 break;
1892 default:
1893 tl_assert(0);
1894 }
1895 return descr;
1896}
1897
1898static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1899 UShort mask;
1900 switch (toff) {
1901 case 0:
1902 if (!(descr & TREE_DESCR_16_0))
1903 descr = pullup_descr_to_16(descr, 0);
1904 if (!(descr & TREE_DESCR_16_1))
1905 descr = pullup_descr_to_16(descr, 2);
1906 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1907 tl_assert( (descr & mask) == mask );
1908 descr &= ~mask;
1909 descr |= TREE_DESCR_32_0;
1910 break;
1911 case 4:
1912 if (!(descr & TREE_DESCR_16_2))
1913 descr = pullup_descr_to_16(descr, 4);
1914 if (!(descr & TREE_DESCR_16_3))
1915 descr = pullup_descr_to_16(descr, 6);
1916 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1917 tl_assert( (descr & mask) == mask );
1918 descr &= ~mask;
1919 descr |= TREE_DESCR_32_1;
1920 break;
1921 default:
1922 tl_assert(0);
1923 }
1924 return descr;
1925}
1926
1927static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1928 switch (toff) {
1929 case 0: case 4:
1930 return 0 != (descr & TREE_DESCR_64);
1931 default:
1932 tl_assert(0);
1933 }
1934}
1935
1936static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1937 switch (toff) {
1938 case 0:
1939 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1940 case 2:
1941 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1942 case 4:
1943 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1944 case 6:
1945 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1946 default:
1947 tl_assert(0);
1948 }
1949}
1950
1951/* ------------ Cache management ------------ */
1952
1953static void zsm_flush_cache ( void )
1954{
1955 shmem__flush_and_invalidate_scache();
1956}
1957
1958
philippe1475a7f2015-05-11 19:45:08 +00001959static void zsm_init ( void )
sewardjf98e1c02008-10-25 16:22:41 +00001960{
1961 tl_assert( sizeof(UWord) == sizeof(Addr) );
1962
sewardjf98e1c02008-10-25 16:22:41 +00001963 tl_assert(map_shmem == NULL);
1964 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1965 HG_(free),
1966 NULL/*unboxed UWord cmp*/);
philippef54cb662015-05-10 22:19:31 +00001967 /* Invalidate all cache entries. */
1968 tl_assert(!is_valid_scache_tag(1));
1969 for (UWord wix = 0; wix < N_WAY_NENT; wix++) {
1970 cache_shmem.tags0[wix] = 1/*INVALID*/;
1971 }
sewardjf98e1c02008-10-25 16:22:41 +00001972
1973 /* a SecMap must contain an integral number of CacheLines */
1974 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1975 /* also ... a CacheLine holds an integral number of trees */
1976 tl_assert(0 == (N_LINE_ARANGE % 8));
1977}
1978
1979/////////////////////////////////////////////////////////////////
1980/////////////////////////////////////////////////////////////////
1981// //
1982// SECTION END compressed shadow memory //
1983// //
1984/////////////////////////////////////////////////////////////////
1985/////////////////////////////////////////////////////////////////
1986
1987
1988
1989/////////////////////////////////////////////////////////////////
1990/////////////////////////////////////////////////////////////////
1991// //
1992// SECTION BEGIN vts primitives //
1993// //
1994/////////////////////////////////////////////////////////////////
1995/////////////////////////////////////////////////////////////////
1996
sewardjf98e1c02008-10-25 16:22:41 +00001997
sewardje4cce742011-02-24 15:25:24 +00001998/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1999 being compact stand-ins for Thr*'s. Use these functions to map
2000 between them. */
2001static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
2002static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
2003
sewardje4cce742011-02-24 15:25:24 +00002004__attribute__((noreturn))
2005static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
2006{
2007 if (due_to_nThrs) {
florian6bf37262012-10-21 03:23:36 +00002008 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00002009 "\n"
2010 "Helgrind: cannot continue, run aborted: too many threads.\n"
2011 "Sorry. Helgrind can only handle programs that create\n"
2012 "%'llu or fewer threads over their entire lifetime.\n"
2013 "\n";
sewardj03e7d272011-05-04 09:08:34 +00002014 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
sewardje4cce742011-02-24 15:25:24 +00002015 } else {
florian6bf37262012-10-21 03:23:36 +00002016 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00002017 "\n"
2018 "Helgrind: cannot continue, run aborted: too many\n"
2019 "synchronisation events. Sorry. Helgrind can only handle\n"
2020 "programs which perform %'llu or fewer\n"
2021 "inter-thread synchronisation events (locks, unlocks, etc).\n"
2022 "\n";
2023 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
2024 }
2025 VG_(exit)(1);
2026 /*NOTREACHED*/
2027 tl_assert(0); /*wtf?!*/
2028}
2029
2030
philippec3508652015-03-28 12:01:58 +00002031/* The dead thread (ThrID, actually) tables. A thread may only be
sewardjffce8152011-06-24 10:09:41 +00002032 listed here if we have been notified thereof by libhb_async_exit.
2033 New entries are added at the end. The order isn't important, but
philippec3508652015-03-28 12:01:58 +00002034 the ThrID values must be unique.
2035 verydead_thread_table_not_pruned lists the identity of the threads
2036 that died since the previous round of pruning.
2037 Once pruning is done, these ThrID are added in verydead_thread_table.
2038 We don't actually need to keep the set of threads that have ever died --
sewardjffce8152011-06-24 10:09:41 +00002039 only the threads that have died since the previous round of
2040 pruning. But it's useful for sanity check purposes to keep the
2041 entire set, so we do. */
philippec3508652015-03-28 12:01:58 +00002042static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
sewardjffce8152011-06-24 10:09:41 +00002043static XArray* /* of ThrID */ verydead_thread_table = NULL;
2044
2045/* Arbitrary total ordering on ThrIDs. */
florian6bd9dc12012-11-23 16:17:43 +00002046static Int cmp__ThrID ( const void* v1, const void* v2 ) {
2047 ThrID id1 = *(const ThrID*)v1;
2048 ThrID id2 = *(const ThrID*)v2;
sewardjffce8152011-06-24 10:09:41 +00002049 if (id1 < id2) return -1;
2050 if (id1 > id2) return 1;
2051 return 0;
2052}
2053
philippec3508652015-03-28 12:01:58 +00002054static void verydead_thread_tables_init ( void )
sewardjffce8152011-06-24 10:09:41 +00002055{
2056 tl_assert(!verydead_thread_table);
philippec3508652015-03-28 12:01:58 +00002057 tl_assert(!verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00002058 verydead_thread_table
2059 = VG_(newXA)( HG_(zalloc),
2060 "libhb.verydead_thread_table_init.1",
2061 HG_(free), sizeof(ThrID) );
sewardjffce8152011-06-24 10:09:41 +00002062 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
philippec3508652015-03-28 12:01:58 +00002063 verydead_thread_table_not_pruned
2064 = VG_(newXA)( HG_(zalloc),
2065 "libhb.verydead_thread_table_init.2",
2066 HG_(free), sizeof(ThrID) );
2067 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
sewardjffce8152011-06-24 10:09:41 +00002068}
2069
philippec3508652015-03-28 12:01:58 +00002070static void verydead_thread_table_sort_and_check (XArray* thrids)
2071{
2072 UWord i;
2073
2074 VG_(sortXA)( thrids );
2075 /* Sanity check: check for unique .sts.thr values. */
2076 UWord nBT = VG_(sizeXA)( thrids );
2077 if (nBT > 0) {
2078 ThrID thrid1, thrid2;
2079 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
2080 for (i = 1; i < nBT; i++) {
2081 thrid1 = thrid2;
2082 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
2083 tl_assert(thrid1 < thrid2);
2084 }
2085 }
2086 /* Ok, so the dead thread table thrids has unique and in-order keys. */
2087}
sewardjf98e1c02008-10-25 16:22:41 +00002088
2089/* A VTS contains .ts, its vector clock, and also .id, a field to hold
2090 a backlink for the caller's convenience. Since we have no idea
2091 what to set that to in the library, it always gets set to
2092 VtsID_INVALID. */
2093typedef
2094 struct {
sewardj7aa38a92011-02-27 23:04:12 +00002095 VtsID id;
2096 UInt usedTS;
2097 UInt sizeTS;
2098 ScalarTS ts[0];
sewardjf98e1c02008-10-25 16:22:41 +00002099 }
2100 VTS;
2101
sewardj7aa38a92011-02-27 23:04:12 +00002102/* Allocate a VTS capable of storing 'sizeTS' entries. */
florian6bd9dc12012-11-23 16:17:43 +00002103static VTS* VTS__new ( const HChar* who, UInt sizeTS );
sewardjf98e1c02008-10-25 16:22:41 +00002104
sewardjffce8152011-06-24 10:09:41 +00002105/* Make a clone of 'vts', sizing the new array to exactly match the
sewardj7aa38a92011-02-27 23:04:12 +00002106 number of ScalarTSs present. */
florian6bd9dc12012-11-23 16:17:43 +00002107static VTS* VTS__clone ( const HChar* who, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002108
sewardjffce8152011-06-24 10:09:41 +00002109/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
2110 array is sized exactly to hold the number of required elements.
2111 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
2112 must be in strictly increasing order. */
florian6bd9dc12012-11-23 16:17:43 +00002113static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
sewardjffce8152011-06-24 10:09:41 +00002114
sewardjf98e1c02008-10-25 16:22:41 +00002115/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00002116static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002117
sewardj7aa38a92011-02-27 23:04:12 +00002118/* Create a new singleton VTS in 'out'. Caller must have
2119 pre-allocated 'out' sufficiently big to hold the result in all
2120 possible cases. */
2121static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00002122
sewardj7aa38a92011-02-27 23:04:12 +00002123/* Create in 'out' a VTS which is the same as 'vts' except with
2124 vts[me]++, so to speak. Caller must have pre-allocated 'out'
2125 sufficiently big to hold the result in all possible cases. */
2126static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002127
sewardj7aa38a92011-02-27 23:04:12 +00002128/* Create in 'out' a VTS which is the join (max) of 'a' and
2129 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
2130 the result in all possible cases. */
2131static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002132
sewardj23f12002009-07-24 08:45:08 +00002133/* Compute the partial ordering relation of the two args. Although we
2134 could be completely general and return an enumeration value (EQ,
2135 LT, GT, UN), in fact we only need LEQ, and so we may as well
2136 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00002137
sewardje4cce742011-02-24 15:25:24 +00002138 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
2139 invald ThrID). In the latter case, the returned ThrID indicates
2140 the discovered point for which they are not. There may be more
2141 than one such point, but we only care about seeing one of them, not
2142 all of them. This rather strange convention is used because
2143 sometimes we want to know the actual index at which they first
2144 differ. */
2145static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002146
2147/* Compute an arbitrary structural (total) ordering on the two args,
2148 based on their VCs, so they can be looked up in a table, tree, etc.
2149 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00002150static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002151
florianb28fe892014-10-28 20:52:07 +00002152/* Debugging only. Display the given VTS. */
2153static void VTS__show ( const VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002154
2155/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00002156static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002157
sewardjffce8152011-06-24 10:09:41 +00002158/* Notify the VTS machinery that a thread has been declared
2159 comprehensively dead: that is, it has done an async exit AND it has
2160 been joined with. This should ensure that its local clocks (.viR
2161 and .viW) will never again change, and so all mentions of this
2162 thread from all VTSs in the system may be removed. */
2163static void VTS__declare_thread_very_dead ( Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002164
2165/*--------------- to do with Vector Timestamps ---------------*/
2166
sewardjf98e1c02008-10-25 16:22:41 +00002167static Bool is_sane_VTS ( VTS* vts )
2168{
2169 UWord i, n;
2170 ScalarTS *st1, *st2;
2171 if (!vts) return False;
sewardj555fc572011-02-27 23:39:53 +00002172 if (vts->usedTS > vts->sizeTS) return False;
sewardj7aa38a92011-02-27 23:04:12 +00002173 n = vts->usedTS;
2174 if (n == 1) {
2175 st1 = &vts->ts[0];
2176 if (st1->tym == 0)
2177 return False;
2178 }
2179 else
sewardjf98e1c02008-10-25 16:22:41 +00002180 if (n >= 2) {
2181 for (i = 0; i < n-1; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002182 st1 = &vts->ts[i];
2183 st2 = &vts->ts[i+1];
sewardje4cce742011-02-24 15:25:24 +00002184 if (st1->thrid >= st2->thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002185 return False;
2186 if (st1->tym == 0 || st2->tym == 0)
2187 return False;
2188 }
2189 }
2190 return True;
2191}
2192
2193
sewardj7aa38a92011-02-27 23:04:12 +00002194/* Create a new, empty VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002195*/
florian6bd9dc12012-11-23 16:17:43 +00002196static VTS* VTS__new ( const HChar* who, UInt sizeTS )
sewardjf98e1c02008-10-25 16:22:41 +00002197{
sewardj7aa38a92011-02-27 23:04:12 +00002198 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
2199 tl_assert(vts->usedTS == 0);
2200 vts->sizeTS = sizeTS;
2201 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
sewardjf98e1c02008-10-25 16:22:41 +00002202 return vts;
2203}
2204
sewardj7aa38a92011-02-27 23:04:12 +00002205/* Clone this VTS.
2206*/
florian6bd9dc12012-11-23 16:17:43 +00002207static VTS* VTS__clone ( const HChar* who, VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002208{
2209 tl_assert(vts);
2210 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2211 UInt nTS = vts->usedTS;
2212 VTS* clone = VTS__new(who, nTS);
2213 clone->id = vts->id;
2214 clone->sizeTS = nTS;
2215 clone->usedTS = nTS;
2216 UInt i;
2217 for (i = 0; i < nTS; i++) {
2218 clone->ts[i] = vts->ts[i];
2219 }
2220 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2221 return clone;
2222}
2223
sewardjf98e1c02008-10-25 16:22:41 +00002224
sewardjffce8152011-06-24 10:09:41 +00002225/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
2226 must be in strictly increasing order. We could obviously do this
2227 much more efficiently (in linear time) if necessary.
2228*/
florian6bd9dc12012-11-23 16:17:43 +00002229static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
sewardjffce8152011-06-24 10:09:41 +00002230{
2231 UInt i, j;
2232 tl_assert(vts);
2233 tl_assert(thridsToDel);
2234 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2235 UInt nTS = vts->usedTS;
2236 /* Figure out how many ScalarTSs will remain in the output. */
2237 UInt nReq = nTS;
2238 for (i = 0; i < nTS; i++) {
2239 ThrID thrid = vts->ts[i].thrid;
2240 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2241 nReq--;
2242 }
2243 tl_assert(nReq <= nTS);
2244 /* Copy the ones that will remain. */
2245 VTS* res = VTS__new(who, nReq);
2246 j = 0;
2247 for (i = 0; i < nTS; i++) {
2248 ThrID thrid = vts->ts[i].thrid;
2249 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2250 continue;
2251 res->ts[j++] = vts->ts[i];
2252 }
2253 tl_assert(j == nReq);
2254 tl_assert(j == res->sizeTS);
2255 res->usedTS = j;
2256 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2257 return res;
2258}
2259
2260
sewardjf98e1c02008-10-25 16:22:41 +00002261/* Delete this VTS in its entirety.
2262*/
sewardj7aa38a92011-02-27 23:04:12 +00002263static void VTS__delete ( VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002264{
2265 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002266 tl_assert(vts->usedTS <= vts->sizeTS);
2267 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
sewardjf98e1c02008-10-25 16:22:41 +00002268 HG_(free)(vts);
2269}
2270
2271
2272/* Create a new singleton VTS.
2273*/
sewardj7aa38a92011-02-27 23:04:12 +00002274static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2275{
sewardjf98e1c02008-10-25 16:22:41 +00002276 tl_assert(thr);
2277 tl_assert(tym >= 1);
sewardj7aa38a92011-02-27 23:04:12 +00002278 tl_assert(out);
2279 tl_assert(out->usedTS == 0);
2280 tl_assert(out->sizeTS >= 1);
2281 UInt hi = out->usedTS++;
2282 out->ts[hi].thrid = Thr__to_ThrID(thr);
2283 out->ts[hi].tym = tym;
sewardjf98e1c02008-10-25 16:22:41 +00002284}
2285
2286
2287/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2288 not modified.
2289*/
sewardj7aa38a92011-02-27 23:04:12 +00002290static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002291{
sewardj7aa38a92011-02-27 23:04:12 +00002292 UInt i, n;
sewardje4cce742011-02-24 15:25:24 +00002293 ThrID me_thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002294 Bool found = False;
sewardjc8028ad2010-05-05 09:34:42 +00002295
2296 stats__vts__tick++;
2297
sewardj7aa38a92011-02-27 23:04:12 +00002298 tl_assert(out);
2299 tl_assert(out->usedTS == 0);
2300 if (vts->usedTS >= ThrID_MAX_VALID)
2301 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2302 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2303
sewardjf98e1c02008-10-25 16:22:41 +00002304 tl_assert(me);
sewardje4cce742011-02-24 15:25:24 +00002305 me_thrid = Thr__to_ThrID(me);
sewardjf98e1c02008-10-25 16:22:41 +00002306 tl_assert(is_sane_VTS(vts));
sewardj7aa38a92011-02-27 23:04:12 +00002307 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002308
sewardj555fc572011-02-27 23:39:53 +00002309 /* Copy all entries which precede 'me'. */
2310 for (i = 0; i < n; i++) {
2311 ScalarTS* here = &vts->ts[i];
2312 if (UNLIKELY(here->thrid >= me_thrid))
2313 break;
2314 UInt hi = out->usedTS++;
2315 out->ts[hi] = *here;
2316 }
2317
2318 /* 'i' now indicates the next entry to copy, if any.
2319 There are 3 possibilities:
2320 (a) there is no next entry (we used them all up already):
2321 add (me_thrid,1) to the output, and quit
2322 (b) there is a next entry, and its thrid > me_thrid:
2323 add (me_thrid,1) to the output, then copy the remaining entries
2324 (c) there is a next entry, and its thrid == me_thrid:
2325 copy it to the output but increment its timestamp value.
2326 Then copy the remaining entries. (c) is the common case.
2327 */
2328 tl_assert(i >= 0 && i <= n);
2329 if (i == n) { /* case (a) */
sewardj7aa38a92011-02-27 23:04:12 +00002330 UInt hi = out->usedTS++;
2331 out->ts[hi].thrid = me_thrid;
2332 out->ts[hi].tym = 1;
sewardj555fc572011-02-27 23:39:53 +00002333 } else {
2334 /* cases (b) and (c) */
2335 ScalarTS* here = &vts->ts[i];
2336 if (me_thrid == here->thrid) { /* case (c) */
sewardj7aa38a92011-02-27 23:04:12 +00002337 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
sewardje4cce742011-02-24 15:25:24 +00002338 /* We're hosed. We have to stop. */
2339 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2340 }
sewardj7aa38a92011-02-27 23:04:12 +00002341 UInt hi = out->usedTS++;
2342 out->ts[hi].thrid = here->thrid;
2343 out->ts[hi].tym = here->tym + 1;
sewardjf98e1c02008-10-25 16:22:41 +00002344 i++;
sewardj555fc572011-02-27 23:39:53 +00002345 found = True;
2346 } else { /* case (b) */
sewardj7aa38a92011-02-27 23:04:12 +00002347 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002348 out->ts[hi].thrid = me_thrid;
2349 out->ts[hi].tym = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002350 }
sewardj555fc572011-02-27 23:39:53 +00002351 /* And copy any remaining entries. */
sewardjf98e1c02008-10-25 16:22:41 +00002352 for (/*keepgoing*/; i < n; i++) {
sewardj555fc572011-02-27 23:39:53 +00002353 ScalarTS* here2 = &vts->ts[i];
sewardj7aa38a92011-02-27 23:04:12 +00002354 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002355 out->ts[hi] = *here2;
sewardjf98e1c02008-10-25 16:22:41 +00002356 }
2357 }
sewardj555fc572011-02-27 23:39:53 +00002358
sewardj7aa38a92011-02-27 23:04:12 +00002359 tl_assert(is_sane_VTS(out));
2360 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2361 tl_assert(out->usedTS <= out->sizeTS);
sewardjf98e1c02008-10-25 16:22:41 +00002362}
2363
2364
2365/* Return a new VTS constructed as the join (max) of the 2 args.
2366 Neither arg is modified.
2367*/
sewardj7aa38a92011-02-27 23:04:12 +00002368static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002369{
sewardj7aa38a92011-02-27 23:04:12 +00002370 UInt ia, ib, useda, usedb;
sewardjf98e1c02008-10-25 16:22:41 +00002371 ULong tyma, tymb, tymMax;
sewardje4cce742011-02-24 15:25:24 +00002372 ThrID thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002373 UInt ncommon = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002374
sewardjc8028ad2010-05-05 09:34:42 +00002375 stats__vts__join++;
2376
sewardj7aa38a92011-02-27 23:04:12 +00002377 tl_assert(a);
2378 tl_assert(b);
2379 useda = a->usedTS;
2380 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002381
sewardj7aa38a92011-02-27 23:04:12 +00002382 tl_assert(out);
2383 tl_assert(out->usedTS == 0);
2384 /* overly conservative test, but doing better involves comparing
2385 the two VTSs, which we don't want to do at this point. */
2386 if (useda + usedb >= ThrID_MAX_VALID)
2387 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2388 tl_assert(out->sizeTS >= useda + usedb);
2389
sewardjf98e1c02008-10-25 16:22:41 +00002390 ia = ib = 0;
2391
2392 while (1) {
2393
sewardje4cce742011-02-24 15:25:24 +00002394 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2395 from a and b in order, where thrid is the next ThrID
sewardjf98e1c02008-10-25 16:22:41 +00002396 occurring in either a or b, and tyma/b are the relevant
2397 scalar timestamps, taking into account implicit zeroes. */
2398 tl_assert(ia >= 0 && ia <= useda);
2399 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002400
njn4c245e52009-03-15 23:25:38 +00002401 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002402 /* both empty - done */
2403 break;
njn4c245e52009-03-15 23:25:38 +00002404
2405 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002406 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002407 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002408 thrid = tmpb->thrid;
2409 tyma = 0;
2410 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002411 ib++;
njn4c245e52009-03-15 23:25:38 +00002412
2413 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002414 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002415 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002416 thrid = tmpa->thrid;
2417 tyma = tmpa->tym;
2418 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002419 ia++;
njn4c245e52009-03-15 23:25:38 +00002420
2421 } else {
sewardje4cce742011-02-24 15:25:24 +00002422 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002423 ScalarTS* tmpa = &a->ts[ia];
2424 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002425 if (tmpa->thrid < tmpb->thrid) {
2426 /* a has the lowest unconsidered ThrID */
2427 thrid = tmpa->thrid;
2428 tyma = tmpa->tym;
2429 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002430 ia++;
sewardje4cce742011-02-24 15:25:24 +00002431 } else if (tmpa->thrid > tmpb->thrid) {
2432 /* b has the lowest unconsidered ThrID */
2433 thrid = tmpb->thrid;
2434 tyma = 0;
2435 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002436 ib++;
2437 } else {
sewardje4cce742011-02-24 15:25:24 +00002438 /* they both next mention the same ThrID */
2439 tl_assert(tmpa->thrid == tmpb->thrid);
2440 thrid = tmpa->thrid; /* == tmpb->thrid */
2441 tyma = tmpa->tym;
2442 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002443 ia++;
2444 ib++;
sewardj7aa38a92011-02-27 23:04:12 +00002445 ncommon++;
sewardjf98e1c02008-10-25 16:22:41 +00002446 }
2447 }
2448
2449 /* having laboriously determined (thr, tyma, tymb), do something
2450 useful with it. */
2451 tymMax = tyma > tymb ? tyma : tymb;
2452 if (tymMax > 0) {
sewardj7aa38a92011-02-27 23:04:12 +00002453 UInt hi = out->usedTS++;
2454 out->ts[hi].thrid = thrid;
2455 out->ts[hi].tym = tymMax;
sewardjf98e1c02008-10-25 16:22:41 +00002456 }
2457
2458 }
2459
sewardj7aa38a92011-02-27 23:04:12 +00002460 tl_assert(is_sane_VTS(out));
2461 tl_assert(out->usedTS <= out->sizeTS);
2462 tl_assert(out->usedTS == useda + usedb - ncommon);
sewardjf98e1c02008-10-25 16:22:41 +00002463}
2464
2465
sewardje4cce742011-02-24 15:25:24 +00002466/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2467 they are, or the first ThrID for which they are not (no valid ThrID
2468 has the value zero). This rather strange convention is used
2469 because sometimes we want to know the actual index at which they
2470 first differ. */
2471static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002472{
sewardj23f12002009-07-24 08:45:08 +00002473 Word ia, ib, useda, usedb;
2474 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00002475
sewardjc8028ad2010-05-05 09:34:42 +00002476 stats__vts__cmpLEQ++;
2477
sewardj7aa38a92011-02-27 23:04:12 +00002478 tl_assert(a);
2479 tl_assert(b);
2480 useda = a->usedTS;
2481 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002482
2483 ia = ib = 0;
2484
2485 while (1) {
2486
njn4c245e52009-03-15 23:25:38 +00002487 /* This logic is to enumerate doubles (tyma, tymb) drawn
2488 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00002489 scalar timestamps, taking into account implicit zeroes. */
sewardje4cce742011-02-24 15:25:24 +00002490 ThrID thrid;
sewardj23f12002009-07-24 08:45:08 +00002491
sewardjf98e1c02008-10-25 16:22:41 +00002492 tl_assert(ia >= 0 && ia <= useda);
2493 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002494
njn4c245e52009-03-15 23:25:38 +00002495 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002496 /* both empty - done */
2497 break;
njn4c245e52009-03-15 23:25:38 +00002498
2499 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002500 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002501 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002502 tyma = 0;
2503 tymb = tmpb->tym;
2504 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002505 ib++;
njn4c245e52009-03-15 23:25:38 +00002506
2507 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002508 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002509 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002510 tyma = tmpa->tym;
2511 thrid = tmpa->thrid;
2512 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002513 ia++;
njn4c245e52009-03-15 23:25:38 +00002514
2515 } else {
sewardje4cce742011-02-24 15:25:24 +00002516 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002517 ScalarTS* tmpa = &a->ts[ia];
2518 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002519 if (tmpa->thrid < tmpb->thrid) {
2520 /* a has the lowest unconsidered ThrID */
2521 tyma = tmpa->tym;
2522 thrid = tmpa->thrid;
2523 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002524 ia++;
2525 }
2526 else
sewardje4cce742011-02-24 15:25:24 +00002527 if (tmpa->thrid > tmpb->thrid) {
2528 /* b has the lowest unconsidered ThrID */
2529 tyma = 0;
2530 tymb = tmpb->tym;
2531 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002532 ib++;
2533 } else {
sewardje4cce742011-02-24 15:25:24 +00002534 /* they both next mention the same ThrID */
2535 tl_assert(tmpa->thrid == tmpb->thrid);
2536 tyma = tmpa->tym;
2537 thrid = tmpa->thrid;
2538 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002539 ia++;
2540 ib++;
2541 }
2542 }
2543
njn4c245e52009-03-15 23:25:38 +00002544 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00002545 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00002546 if (tyma > tymb) {
2547 /* not LEQ at this index. Quit, since the answer is
2548 determined already. */
sewardje4cce742011-02-24 15:25:24 +00002549 tl_assert(thrid >= 1024);
2550 return thrid;
sewardj23f12002009-07-24 08:45:08 +00002551 }
sewardjf98e1c02008-10-25 16:22:41 +00002552 }
2553
sewardje4cce742011-02-24 15:25:24 +00002554 return 0; /* all points are LEQ => return an invalid ThrID */
sewardjf98e1c02008-10-25 16:22:41 +00002555}
2556
2557
2558/* Compute an arbitrary structural (total) ordering on the two args,
2559 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00002560 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2561 performance critical so there is some effort expended to make it sa
2562 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00002563*/
2564Word VTS__cmp_structural ( VTS* a, VTS* b )
2565{
2566 /* We just need to generate an arbitrary total ordering based on
2567 a->ts and b->ts. Preferably do it in a way which comes across likely
2568 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00002569 Word i;
2570 Word useda = 0, usedb = 0;
2571 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002572
sewardjc8028ad2010-05-05 09:34:42 +00002573 stats__vts__cmp_structural++;
2574
2575 tl_assert(a);
2576 tl_assert(b);
2577
sewardj7aa38a92011-02-27 23:04:12 +00002578 ctsa = &a->ts[0]; useda = a->usedTS;
2579 ctsb = &b->ts[0]; usedb = b->usedTS;
sewardjc8028ad2010-05-05 09:34:42 +00002580
2581 if (LIKELY(useda == usedb)) {
2582 ScalarTS *tmpa = NULL, *tmpb = NULL;
2583 stats__vts__cmp_structural_slow++;
2584 /* Same length vectors. Find the first difference, if any, as
2585 fast as possible. */
2586 for (i = 0; i < useda; i++) {
2587 tmpa = &ctsa[i];
2588 tmpb = &ctsb[i];
sewardje4cce742011-02-24 15:25:24 +00002589 if (LIKELY(tmpa->tym == tmpb->tym
2590 && tmpa->thrid == tmpb->thrid))
sewardjc8028ad2010-05-05 09:34:42 +00002591 continue;
2592 else
2593 break;
2594 }
2595 if (UNLIKELY(i == useda)) {
2596 /* They're identical. */
2597 return 0;
2598 } else {
2599 tl_assert(i >= 0 && i < useda);
2600 if (tmpa->tym < tmpb->tym) return -1;
2601 if (tmpa->tym > tmpb->tym) return 1;
sewardje4cce742011-02-24 15:25:24 +00002602 if (tmpa->thrid < tmpb->thrid) return -1;
2603 if (tmpa->thrid > tmpb->thrid) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002604 /* we just established them as non-identical, hence: */
2605 }
2606 /*NOTREACHED*/
2607 tl_assert(0);
2608 }
sewardjf98e1c02008-10-25 16:22:41 +00002609
2610 if (useda < usedb) return -1;
2611 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002612 /*NOTREACHED*/
2613 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00002614}
2615
2616
florianb28fe892014-10-28 20:52:07 +00002617/* Debugging only. Display the given VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002618*/
florianb28fe892014-10-28 20:52:07 +00002619static void VTS__show ( const VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002620{
sewardjf98e1c02008-10-25 16:22:41 +00002621 Word i, n;
florian4367abe2015-02-28 09:22:09 +00002622 tl_assert(vts);
florianb28fe892014-10-28 20:52:07 +00002623
2624 VG_(printf)("[");
sewardj7aa38a92011-02-27 23:04:12 +00002625 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002626 for (i = 0; i < n; i++) {
florianb28fe892014-10-28 20:52:07 +00002627 const ScalarTS *st = &vts->ts[i];
2628 VG_(printf)(i < n-1 ? "%u:%llu " : "%u:%llu", st->thrid, (ULong)st->tym);
sewardjf98e1c02008-10-25 16:22:41 +00002629 }
florianb28fe892014-10-28 20:52:07 +00002630 VG_(printf)("]");
sewardjf98e1c02008-10-25 16:22:41 +00002631}
2632
2633
2634/* Debugging only. Return vts[index], so to speak.
2635*/
sewardj7aa38a92011-02-27 23:04:12 +00002636ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2637{
sewardjf98e1c02008-10-25 16:22:41 +00002638 UWord i, n;
sewardje4cce742011-02-24 15:25:24 +00002639 ThrID idx_thrid = Thr__to_ThrID(idx);
sewardjc8028ad2010-05-05 09:34:42 +00002640 stats__vts__indexat_slow++;
florian4367abe2015-02-28 09:22:09 +00002641 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002642 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002643 for (i = 0; i < n; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002644 ScalarTS* st = &vts->ts[i];
sewardje4cce742011-02-24 15:25:24 +00002645 if (st->thrid == idx_thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002646 return st->tym;
2647 }
2648 return 0;
2649}
2650
2651
sewardjffce8152011-06-24 10:09:41 +00002652/* See comment on prototype above.
2653*/
2654static void VTS__declare_thread_very_dead ( Thr* thr )
2655{
2656 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2657
2658 tl_assert(thr->llexit_done);
2659 tl_assert(thr->joinedwith_done);
2660
2661 ThrID nyu;
2662 nyu = Thr__to_ThrID(thr);
philippec3508652015-03-28 12:01:58 +00002663 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
sewardjffce8152011-06-24 10:09:41 +00002664
2665 /* We can only get here if we're assured that we'll never again
2666 need to look at this thread's ::viR or ::viW. Set them to
2667 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2668 mostly so that we don't wind up pruning them (as that would be
2669 nonsensical: the only interesting ScalarTS entry for a dead
2670 thread is its own index, and the pruning will remove that.). */
2671 VtsID__rcdec(thr->viR);
2672 VtsID__rcdec(thr->viW);
2673 thr->viR = VtsID_INVALID;
2674 thr->viW = VtsID_INVALID;
2675}
2676
2677
sewardjf98e1c02008-10-25 16:22:41 +00002678/////////////////////////////////////////////////////////////////
2679/////////////////////////////////////////////////////////////////
2680// //
2681// SECTION END vts primitives //
2682// //
2683/////////////////////////////////////////////////////////////////
2684/////////////////////////////////////////////////////////////////
2685
2686
2687
2688/////////////////////////////////////////////////////////////////
2689/////////////////////////////////////////////////////////////////
2690// //
2691// SECTION BEGIN main library //
2692// //
2693/////////////////////////////////////////////////////////////////
2694/////////////////////////////////////////////////////////////////
2695
2696
2697/////////////////////////////////////////////////////////
2698// //
2699// VTS set //
2700// //
2701/////////////////////////////////////////////////////////
2702
sewardjffce8152011-06-24 10:09:41 +00002703static WordFM* /* WordFM VTS* void */ vts_set = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002704
2705static void vts_set_init ( void )
2706{
2707 tl_assert(!vts_set);
2708 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2709 HG_(free),
2710 (Word(*)(UWord,UWord))VTS__cmp_structural );
sewardjf98e1c02008-10-25 16:22:41 +00002711}
2712
sewardj7aa38a92011-02-27 23:04:12 +00002713/* Given a VTS, look in vts_set to see if we already have a
2714 structurally identical one. If yes, return the pair (True, pointer
2715 to the existing one). If no, clone this one, add the clone to the
2716 set, and return (False, pointer to the clone). */
2717static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002718{
2719 UWord keyW, valW;
sewardj7aa38a92011-02-27 23:04:12 +00002720 stats__vts_set__focaa++;
2721 tl_assert(cand->id == VtsID_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00002722 /* lookup cand (by value) */
2723 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2724 /* found it */
2725 tl_assert(valW == 0);
2726 /* if this fails, cand (by ref) was already present (!) */
2727 tl_assert(keyW != (UWord)cand);
sewardj7aa38a92011-02-27 23:04:12 +00002728 *res = (VTS*)keyW;
2729 return True;
sewardjf98e1c02008-10-25 16:22:41 +00002730 } else {
sewardj7aa38a92011-02-27 23:04:12 +00002731 /* not present. Clone, add and return address of clone. */
2732 stats__vts_set__focaa_a++;
2733 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2734 tl_assert(clone != cand);
2735 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2736 *res = clone;
2737 return False;
sewardjf98e1c02008-10-25 16:22:41 +00002738 }
2739}
2740
2741
2742/////////////////////////////////////////////////////////
2743// //
2744// VTS table //
2745// //
2746/////////////////////////////////////////////////////////
2747
2748static void VtsID__invalidate_caches ( void ); /* fwds */
2749
2750/* A type to hold VTS table entries. Invariants:
2751 If .vts == NULL, then this entry is not in use, so:
2752 - .rc == 0
2753 - this entry is on the freelist (unfortunately, does not imply
philippea1ac2f42015-05-01 17:12:00 +00002754 any constraints on value for u.freelink)
sewardjf98e1c02008-10-25 16:22:41 +00002755 If .vts != NULL, then this entry is in use:
2756 - .vts is findable in vts_set
2757 - .vts->id == this entry number
2758 - no specific value for .rc (even 0 is OK)
philippea1ac2f42015-05-01 17:12:00 +00002759 - this entry is not on freelist, so u.freelink == VtsID_INVALID
sewardjf98e1c02008-10-25 16:22:41 +00002760*/
2761typedef
2762 struct {
2763 VTS* vts; /* vts, in vts_set */
2764 UWord rc; /* reference count - enough for entire aspace */
philippea1ac2f42015-05-01 17:12:00 +00002765 union {
2766 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2767 VtsID remap; /* used only during pruning, for used entries */
2768 } u;
2769 /* u.freelink only used when vts == NULL,
2770 u.remap only used when vts != NULL, during pruning. */
sewardjf98e1c02008-10-25 16:22:41 +00002771 }
2772 VtsTE;
2773
2774/* The VTS table. */
2775static XArray* /* of VtsTE */ vts_tab = NULL;
2776
2777/* An index into the VTS table, indicating the start of the list of
2778 free (available for use) entries. If the list is empty, this is
2779 VtsID_INVALID. */
2780static VtsID vts_tab_freelist = VtsID_INVALID;
2781
2782/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2783 vts_tab equals or exceeds this size. After GC, the value here is
2784 set appropriately so as to check for the next GC point. */
2785static Word vts_next_GC_at = 1000;
2786
2787static void vts_tab_init ( void )
2788{
florian91ed8cc2014-09-15 18:50:17 +00002789 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2790 HG_(free), sizeof(VtsTE) );
2791 vts_tab_freelist = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002792}
2793
2794/* Add ii to the free list, checking that it looks out-of-use. */
2795static void add_to_free_list ( VtsID ii )
2796{
2797 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2798 tl_assert(ie->vts == NULL);
2799 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002800 tl_assert(ie->u.freelink == VtsID_INVALID);
2801 ie->u.freelink = vts_tab_freelist;
sewardjf98e1c02008-10-25 16:22:41 +00002802 vts_tab_freelist = ii;
2803}
2804
2805/* Get an entry from the free list. This will return VtsID_INVALID if
2806 the free list is empty. */
2807static VtsID get_from_free_list ( void )
2808{
2809 VtsID ii;
2810 VtsTE* ie;
2811 if (vts_tab_freelist == VtsID_INVALID)
2812 return VtsID_INVALID;
2813 ii = vts_tab_freelist;
2814 ie = VG_(indexXA)( vts_tab, ii );
2815 tl_assert(ie->vts == NULL);
2816 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002817 vts_tab_freelist = ie->u.freelink;
sewardjf98e1c02008-10-25 16:22:41 +00002818 return ii;
2819}
2820
2821/* Produce a new VtsID that can be used, either by getting it from
2822 the freelist, or, if that is empty, by expanding vts_tab. */
2823static VtsID get_new_VtsID ( void )
2824{
2825 VtsID ii;
2826 VtsTE te;
2827 ii = get_from_free_list();
2828 if (ii != VtsID_INVALID)
2829 return ii;
2830 te.vts = NULL;
2831 te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002832 te.u.freelink = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002833 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2834 return ii;
2835}
2836
2837
2838/* Indirect callback from lib_zsm. */
2839static void VtsID__rcinc ( VtsID ii )
2840{
2841 VtsTE* ie;
2842 /* VG_(indexXA) does a range check for us */
2843 ie = VG_(indexXA)( vts_tab, ii );
2844 tl_assert(ie->vts); /* else it's not in use */
2845 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2846 tl_assert(ie->vts->id == ii);
2847 ie->rc++;
2848}
2849
2850/* Indirect callback from lib_zsm. */
2851static void VtsID__rcdec ( VtsID ii )
2852{
2853 VtsTE* ie;
2854 /* VG_(indexXA) does a range check for us */
2855 ie = VG_(indexXA)( vts_tab, ii );
2856 tl_assert(ie->vts); /* else it's not in use */
2857 tl_assert(ie->rc > 0); /* else RC snafu */
2858 tl_assert(ie->vts->id == ii);
2859 ie->rc--;
2860}
2861
2862
sewardj7aa38a92011-02-27 23:04:12 +00002863/* Look up 'cand' in our collection of VTSs. If present, return the
2864 VtsID for the pre-existing version. If not present, clone it, add
2865 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2866 it, and return that. */
2867static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002868{
sewardj7aa38a92011-02-27 23:04:12 +00002869 VTS* in_tab = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002870 tl_assert(cand->id == VtsID_INVALID);
sewardj7aa38a92011-02-27 23:04:12 +00002871 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2872 tl_assert(in_tab);
2873 if (already_have) {
2874 /* We already have a copy of 'cand'. Use that. */
sewardjf98e1c02008-10-25 16:22:41 +00002875 VtsTE* ie;
sewardj7aa38a92011-02-27 23:04:12 +00002876 tl_assert(in_tab->id != VtsID_INVALID);
2877 ie = VG_(indexXA)( vts_tab, in_tab->id );
2878 tl_assert(ie->vts == in_tab);
2879 return in_tab->id;
sewardjf98e1c02008-10-25 16:22:41 +00002880 } else {
2881 VtsID ii = get_new_VtsID();
2882 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
sewardj7aa38a92011-02-27 23:04:12 +00002883 ie->vts = in_tab;
sewardjf98e1c02008-10-25 16:22:41 +00002884 ie->rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002885 ie->u.freelink = VtsID_INVALID;
sewardj7aa38a92011-02-27 23:04:12 +00002886 in_tab->id = ii;
sewardjf98e1c02008-10-25 16:22:41 +00002887 return ii;
2888 }
2889}
2890
2891
florian6bd9dc12012-11-23 16:17:43 +00002892static void show_vts_stats ( const HChar* caller )
sewardjf98e1c02008-10-25 16:22:41 +00002893{
2894 UWord nSet, nTab, nLive;
2895 ULong totrc;
2896 UWord n, i;
2897 nSet = VG_(sizeFM)( vts_set );
2898 nTab = VG_(sizeXA)( vts_tab );
2899 totrc = 0;
2900 nLive = 0;
2901 n = VG_(sizeXA)( vts_tab );
2902 for (i = 0; i < n; i++) {
2903 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2904 if (ie->vts) {
2905 nLive++;
2906 totrc += (ULong)ie->rc;
2907 } else {
2908 tl_assert(ie->rc == 0);
2909 }
2910 }
2911 VG_(printf)(" show_vts_stats %s\n", caller);
2912 VG_(printf)(" vts_tab size %4lu\n", nTab);
2913 VG_(printf)(" vts_tab live %4lu\n", nLive);
2914 VG_(printf)(" vts_set size %4lu\n", nSet);
2915 VG_(printf)(" total rc %4llu\n", totrc);
2916}
2917
sewardjffce8152011-06-24 10:09:41 +00002918
2919/* --- Helpers for VtsID pruning --- */
2920
2921static
2922void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2923 /*MOD*/XArray* /* of VtsTE */ new_tab,
2924 VtsID* ii )
2925{
2926 VtsTE *old_te, *new_te;
2927 VtsID old_id, new_id;
2928 /* We're relying here on VG_(indexXA)'s range checking to assert on
2929 any stupid values, in particular *ii == VtsID_INVALID. */
2930 old_id = *ii;
2931 old_te = VG_(indexXA)( old_tab, old_id );
2932 old_te->rc--;
philippea1ac2f42015-05-01 17:12:00 +00002933 new_id = old_te->u.remap;
sewardjffce8152011-06-24 10:09:41 +00002934 new_te = VG_(indexXA)( new_tab, new_id );
2935 new_te->rc++;
2936 *ii = new_id;
2937}
2938
2939static
2940void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2941 /*MOD*/XArray* /* of VtsTE */ new_tab,
2942 SVal* s )
2943{
2944 SVal old_sv, new_sv;
2945 old_sv = *s;
2946 if (SVal__isC(old_sv)) {
2947 VtsID rMin, wMin;
2948 rMin = SVal__unC_Rmin(old_sv);
2949 wMin = SVal__unC_Wmin(old_sv);
2950 remap_VtsID( old_tab, new_tab, &rMin );
2951 remap_VtsID( old_tab, new_tab, &wMin );
2952 new_sv = SVal__mkC( rMin, wMin );
2953 *s = new_sv;
2954 }
2955}
2956
2957
sewardjf98e1c02008-10-25 16:22:41 +00002958/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002959__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002960static void vts_tab__do_GC ( Bool show_stats )
2961{
2962 UWord i, nTab, nLive, nFreed;
2963
sewardjffce8152011-06-24 10:09:41 +00002964 /* ---------- BEGIN VTS GC ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00002965 /* check this is actually necessary. */
2966 tl_assert(vts_tab_freelist == VtsID_INVALID);
2967
2968 /* empty the caches for partial order checks and binary joins. We
2969 could do better and prune out the entries to be deleted, but it
2970 ain't worth the hassle. */
2971 VtsID__invalidate_caches();
2972
2973 /* First, make the reference counts up to date. */
2974 zsm_flush_cache();
2975
2976 nTab = VG_(sizeXA)( vts_tab );
2977
2978 if (show_stats) {
2979 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2980 show_vts_stats("before GC");
2981 }
2982
sewardjffce8152011-06-24 10:09:41 +00002983 /* Now we can inspect the entire vts_tab. Any entries with zero
2984 .rc fields are now no longer in use and can be put back on the
sewardjf98e1c02008-10-25 16:22:41 +00002985 free list, removed from vts_set, and deleted. */
2986 nFreed = 0;
2987 for (i = 0; i < nTab; i++) {
2988 Bool present;
sewardjffce8152011-06-24 10:09:41 +00002989 UWord oldK = 0, oldV = 12345;
sewardjf98e1c02008-10-25 16:22:41 +00002990 VtsTE* te = VG_(indexXA)( vts_tab, i );
2991 if (te->vts == NULL) {
2992 tl_assert(te->rc == 0);
2993 continue; /* already on the free list (presumably) */
2994 }
2995 if (te->rc > 0)
2996 continue; /* in use */
2997 /* Ok, we got one we can free. */
2998 tl_assert(te->vts->id == i);
2999 /* first, remove it from vts_set. */
3000 present = VG_(delFromFM)( vts_set,
3001 &oldK, &oldV, (UWord)te->vts );
3002 tl_assert(present); /* else it isn't in vts_set ?! */
3003 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3004 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
3005 /* now free the VTS itself */
3006 VTS__delete(te->vts);
3007 te->vts = NULL;
3008 /* and finally put this entry on the free list */
philippea1ac2f42015-05-01 17:12:00 +00003009 tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
sewardjf98e1c02008-10-25 16:22:41 +00003010 add_to_free_list( i );
3011 nFreed++;
3012 }
3013
3014 /* Now figure out when the next GC should be. We'll allow the
3015 number of VTSs to double before GCing again. Except of course
3016 that since we can't (or, at least, don't) shrink vts_tab, we
3017 can't set the threshhold value smaller than it. */
3018 tl_assert(nFreed <= nTab);
3019 nLive = nTab - nFreed;
3020 tl_assert(nLive >= 0 && nLive <= nTab);
3021 vts_next_GC_at = 2 * nLive;
3022 if (vts_next_GC_at < nTab)
3023 vts_next_GC_at = nTab;
3024
3025 if (show_stats) {
3026 show_vts_stats("after GC");
3027 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
3028 }
3029
philippe2bd23262015-05-11 20:56:49 +00003030 stats__vts_tab_GC++;
sewardj5e2ac3b2009-08-11 10:39:25 +00003031 if (VG_(clo_stats)) {
sewardjf98e1c02008-10-25 16:22:41 +00003032 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00003033 VG_(message)(Vg_DebugMsg,
philippef54cb662015-05-10 22:19:31 +00003034 "libhb: VTS GC: #%lu old size %lu live %lu (%2llu%%)\n",
3035 stats__vts_tab_GC,
3036 nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00003037 }
sewardjffce8152011-06-24 10:09:41 +00003038 /* ---------- END VTS GC ---------- */
3039
3040 /* Decide whether to do VTS pruning. We have one of three
3041 settings. */
3042 static UInt pruning_auto_ctr = 0; /* do not make non-static */
3043
3044 Bool do_pruning = False;
3045 switch (HG_(clo_vts_pruning)) {
3046 case 0: /* never */
3047 break;
3048 case 1: /* auto */
3049 do_pruning = (++pruning_auto_ctr % 5) == 0;
3050 break;
3051 case 2: /* always */
3052 do_pruning = True;
3053 break;
3054 default:
3055 tl_assert(0);
3056 }
3057
3058 /* The rest of this routine only handles pruning, so we can
3059 quit at this point if it is not to be done. */
3060 if (!do_pruning)
3061 return;
philippec3508652015-03-28 12:01:58 +00003062 /* No need to do pruning if no thread died since the last pruning as
3063 no VtsTE can be pruned. */
3064 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
3065 return;
sewardjffce8152011-06-24 10:09:41 +00003066
3067 /* ---------- BEGIN VTS PRUNING ---------- */
philippec3508652015-03-28 12:01:58 +00003068 /* Sort and check the very dead threads that died since the last pruning.
3069 Sorting is used for the check and so that we can quickly look
sewardjffce8152011-06-24 10:09:41 +00003070 up the dead-thread entries as we work through the VTSs. */
philippec3508652015-03-28 12:01:58 +00003071 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003072
3073 /* We will run through the old table, and create a new table and
philippea1ac2f42015-05-01 17:12:00 +00003074 set, at the same time setting the u.remap entries in the old
sewardjffce8152011-06-24 10:09:41 +00003075 table to point to the new entries. Then, visit every VtsID in
3076 the system, and replace all of them with new ones, using the
philippea1ac2f42015-05-01 17:12:00 +00003077 u.remap entries in the old table. Finally, we can delete the old
sewardjffce8152011-06-24 10:09:41 +00003078 table and set. */
3079
3080 XArray* /* of VtsTE */ new_tab
3081 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
3082 HG_(free), sizeof(VtsTE) );
3083
3084 /* WordFM VTS* void */
3085 WordFM* new_set
3086 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
3087 HG_(free),
3088 (Word(*)(UWord,UWord))VTS__cmp_structural );
3089
3090 /* Visit each old VTS. For each one:
3091
3092 * make a pruned version
3093
3094 * search new_set for the pruned version, yielding either
3095 Nothing (not present) or the new VtsID for it.
3096
3097 * if not present, allocate a new VtsID for it, insert (pruned
3098 VTS, new VtsID) in the tree, and set
3099 remap_table[old VtsID] = new VtsID.
3100
3101 * if present, set remap_table[old VtsID] = new VtsID, where
3102 new VtsID was determined by the tree lookup. Then free up
3103 the clone.
3104 */
3105
3106 UWord nBeforePruning = 0, nAfterPruning = 0;
3107 UWord nSTSsBefore = 0, nSTSsAfter = 0;
3108 VtsID new_VtsID_ctr = 0;
3109
3110 for (i = 0; i < nTab; i++) {
3111
3112 /* For each old VTS .. */
3113 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
3114 VTS* old_vts = old_te->vts;
sewardjffce8152011-06-24 10:09:41 +00003115
3116 /* Skip it if not in use */
3117 if (old_te->rc == 0) {
3118 tl_assert(old_vts == NULL);
3119 continue;
3120 }
philippea1ac2f42015-05-01 17:12:00 +00003121 tl_assert(old_te->u.remap == VtsID_INVALID);
sewardjffce8152011-06-24 10:09:41 +00003122 tl_assert(old_vts != NULL);
3123 tl_assert(old_vts->id == i);
3124 tl_assert(old_vts->ts != NULL);
3125
3126 /* It is in use. Make a pruned version. */
3127 nBeforePruning++;
3128 nSTSsBefore += old_vts->usedTS;
3129 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
philippec3508652015-03-28 12:01:58 +00003130 old_vts, verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003131 tl_assert(new_vts->sizeTS == new_vts->usedTS);
3132 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
3133 == 0x0ddC0ffeeBadF00dULL);
3134
3135 /* Get rid of the old VTS and the tree entry. It's a bit more
3136 complex to incrementally delete the VTSs now than to nuke
3137 them all after we're done, but the upside is that we don't
3138 wind up temporarily storing potentially two complete copies
3139 of each VTS and hence spiking memory use. */
3140 UWord oldK = 0, oldV = 12345;
3141 Bool present = VG_(delFromFM)( vts_set,
3142 &oldK, &oldV, (UWord)old_vts );
3143 tl_assert(present); /* else it isn't in vts_set ?! */
3144 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3145 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
3146 /* now free the VTS itself */
3147 VTS__delete(old_vts);
3148 old_te->vts = NULL;
3149 old_vts = NULL;
3150
3151 /* NO MENTIONS of old_vts allowed beyond this point. */
3152
3153 /* Ok, we have the pruned copy in new_vts. See if a
3154 structurally identical version is already present in new_set.
3155 If so, delete the one we just made and move on; if not, add
3156 it. */
3157 VTS* identical_version = NULL;
3158 UWord valW = 12345;
3159 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
3160 (UWord)new_vts)) {
3161 // already have it
3162 tl_assert(valW == 0);
3163 tl_assert(identical_version != NULL);
3164 tl_assert(identical_version != new_vts);
3165 VTS__delete(new_vts);
3166 new_vts = identical_version;
3167 tl_assert(new_vts->id != VtsID_INVALID);
3168 } else {
3169 tl_assert(valW == 12345);
3170 tl_assert(identical_version == NULL);
3171 new_vts->id = new_VtsID_ctr++;
3172 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
3173 tl_assert(!b);
3174 VtsTE new_te;
3175 new_te.vts = new_vts;
3176 new_te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00003177 new_te.u.freelink = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003178 Word j = VG_(addToXA)( new_tab, &new_te );
3179 tl_assert(j <= i);
3180 tl_assert(j == new_VtsID_ctr - 1);
3181 // stats
3182 nAfterPruning++;
3183 nSTSsAfter += new_vts->usedTS;
3184 }
philippea1ac2f42015-05-01 17:12:00 +00003185 old_te->u.remap = new_vts->id;
sewardjffce8152011-06-24 10:09:41 +00003186
3187 } /* for (i = 0; i < nTab; i++) */
3188
philippec3508652015-03-28 12:01:58 +00003189 /* Move very dead thread from verydead_thread_table_not_pruned to
3190 verydead_thread_table. Sort and check verydead_thread_table
3191 to verify a thread was reported very dead only once. */
3192 {
3193 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
3194
3195 for (i = 0; i < nBT; i++) {
3196 ThrID thrid =
3197 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
3198 VG_(addToXA)( verydead_thread_table, &thrid );
3199 }
3200 verydead_thread_table_sort_and_check (verydead_thread_table);
3201 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
3202 }
3203
sewardjffce8152011-06-24 10:09:41 +00003204 /* At this point, we have:
philippea1ac2f42015-05-01 17:12:00 +00003205 * the old VTS table, with its u.remap entries set,
sewardjffce8152011-06-24 10:09:41 +00003206 and with all .vts == NULL.
3207 * the old VTS tree should be empty, since it and the old VTSs
3208 it contained have been incrementally deleted was we worked
3209 through the old table.
philippea1ac2f42015-05-01 17:12:00 +00003210 * the new VTS table, with all .rc == 0, all u.freelink and u.remap
sewardjffce8152011-06-24 10:09:41 +00003211 == VtsID_INVALID.
3212 * the new VTS tree.
3213 */
3214 tl_assert( VG_(sizeFM)(vts_set) == 0 );
3215
3216 /* Now actually apply the mapping. */
3217 /* Visit all the VtsIDs in the entire system. Where do we expect
3218 to find them?
3219 (a) in shadow memory -- the LineZs and LineFs
3220 (b) in our collection of struct _Thrs.
3221 (c) in our collection of struct _SOs.
3222 Nowhere else, AFAICS. Not in the zsm cache, because that just
3223 got invalidated.
3224
philippea1ac2f42015-05-01 17:12:00 +00003225 Using the u.remap fields in vts_tab, map each old VtsID to a new
sewardjffce8152011-06-24 10:09:41 +00003226 VtsID. For each old VtsID, dec its rc; and for each new one,
3227 inc it. This sets up the new refcounts, and it also gives a
3228 cheap sanity check of the old ones: all old refcounts should be
3229 zero after this operation.
3230 */
3231
3232 /* Do the mappings for (a) above: iterate over the Primary shadow
3233 mem map (WordFM Addr SecMap*). */
3234 UWord secmapW = 0;
3235 VG_(initIterFM)( map_shmem );
3236 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3237 UWord j;
3238 SecMap* sm = (SecMap*)secmapW;
3239 tl_assert(sm->magic == SecMap_MAGIC);
3240 /* Deal with the LineZs */
3241 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3242 LineZ* lineZ = &sm->linesZ[i];
3243 if (lineZ->dict[0] == SVal_INVALID)
3244 continue; /* not in use -- data is in F rep instead */
3245 for (j = 0; j < 4; j++)
3246 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3247 }
3248 /* Deal with the LineFs */
3249 for (i = 0; i < sm->linesF_size; i++) {
3250 LineF* lineF = &sm->linesF[i];
3251 if (!lineF->inUse)
3252 continue;
3253 for (j = 0; j < N_LINE_ARANGE; j++)
3254 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3255 }
3256 }
3257 VG_(doneIterFM)( map_shmem );
3258
3259 /* Do the mappings for (b) above: visit our collection of struct
3260 _Thrs. */
3261 Thread* hgthread = get_admin_threads();
3262 tl_assert(hgthread);
3263 while (hgthread) {
3264 Thr* hbthr = hgthread->hbthr;
3265 tl_assert(hbthr);
3266 /* Threads that are listed in the prunable set have their viR
3267 and viW set to VtsID_INVALID, so we can't mess with them. */
3268 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3269 tl_assert(hbthr->viR == VtsID_INVALID);
3270 tl_assert(hbthr->viW == VtsID_INVALID);
3271 hgthread = hgthread->admin;
3272 continue;
3273 }
3274 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3275 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3276 hgthread = hgthread->admin;
3277 }
3278
3279 /* Do the mappings for (c) above: visit the struct _SOs. */
3280 SO* so = admin_SO;
3281 while (so) {
3282 if (so->viR != VtsID_INVALID)
3283 remap_VtsID( vts_tab, new_tab, &so->viR );
3284 if (so->viW != VtsID_INVALID)
3285 remap_VtsID( vts_tab, new_tab, &so->viW );
3286 so = so->admin_next;
3287 }
3288
3289 /* So, we're nearly done (with this incredibly complex operation).
3290 Check the refcounts for the old VtsIDs all fell to zero, as
3291 expected. Any failure is serious. */
3292 for (i = 0; i < nTab; i++) {
3293 VtsTE* te = VG_(indexXA)( vts_tab, i );
3294 tl_assert(te->vts == NULL);
3295 /* This is the assert proper. Note we're also asserting
philippea1ac2f42015-05-01 17:12:00 +00003296 zeroness for old entries which are unmapped. That's OK. */
sewardjffce8152011-06-24 10:09:41 +00003297 tl_assert(te->rc == 0);
3298 }
3299
3300 /* Install the new table and set. */
3301 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3302 vts_set = new_set;
3303 VG_(deleteXA)( vts_tab );
3304 vts_tab = new_tab;
3305
3306 /* The freelist of vts_tab entries is empty now, because we've
3307 compacted all of the live entries at the low end of the
3308 table. */
3309 vts_tab_freelist = VtsID_INVALID;
3310
3311 /* Sanity check vts_set and vts_tab. */
3312
3313 /* Because all the live entries got slid down to the bottom of vts_tab: */
3314 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3315
3316 /* Assert that the vts_tab and vts_set entries point at each other
3317 in the required way */
3318 UWord wordK = 0, wordV = 0;
3319 VG_(initIterFM)( vts_set );
3320 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3321 tl_assert(wordK != 0);
3322 tl_assert(wordV == 0);
3323 VTS* vts = (VTS*)wordK;
3324 tl_assert(vts->id != VtsID_INVALID);
3325 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3326 tl_assert(te->vts == vts);
3327 }
3328 VG_(doneIterFM)( vts_set );
3329
3330 /* Also iterate over the table, and check each entry is
3331 plausible. */
3332 nTab = VG_(sizeXA)( vts_tab );
3333 for (i = 0; i < nTab; i++) {
3334 VtsTE* te = VG_(indexXA)( vts_tab, i );
3335 tl_assert(te->vts);
3336 tl_assert(te->vts->id == i);
3337 tl_assert(te->rc > 0); /* 'cos we just GC'd */
philippea1ac2f42015-05-01 17:12:00 +00003338 tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3339 /* value of te->u.remap not relevant */
sewardjffce8152011-06-24 10:09:41 +00003340 }
3341
3342 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
philippe2bd23262015-05-11 20:56:49 +00003343 stats__vts_pruning++;
sewardjffce8152011-06-24 10:09:41 +00003344 if (VG_(clo_stats)) {
sewardjffce8152011-06-24 10:09:41 +00003345 tl_assert(nTab > 0);
3346 VG_(message)(
3347 Vg_DebugMsg,
philippe2bd23262015-05-11 20:56:49 +00003348 "libhb: VTS PR: #%lu before %lu (avg sz %lu) "
sewardjffce8152011-06-24 10:09:41 +00003349 "after %lu (avg sz %lu)\n",
philippe2bd23262015-05-11 20:56:49 +00003350 stats__vts_pruning,
sewardjffce8152011-06-24 10:09:41 +00003351 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3352 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3353 );
3354 }
sewardjffce8152011-06-24 10:09:41 +00003355 /* ---------- END VTS PRUNING ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00003356}
3357
3358
3359/////////////////////////////////////////////////////////
3360// //
3361// Vts IDs //
3362// //
3363/////////////////////////////////////////////////////////
3364
3365//////////////////////////
sewardj7aa38a92011-02-27 23:04:12 +00003366/* A temporary, max-sized VTS which is used as a temporary (the first
3367 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3368static VTS* temp_max_sized_VTS = NULL;
3369
3370//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00003371static ULong stats__cmpLEQ_queries = 0;
3372static ULong stats__cmpLEQ_misses = 0;
3373static ULong stats__join2_queries = 0;
3374static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003375
3376static inline UInt ROL32 ( UInt w, Int n ) {
3377 w = (w << n) | (w >> (32-n));
3378 return w;
3379}
3380static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3381 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3382 return hash % nTab;
3383}
3384
sewardj23f12002009-07-24 08:45:08 +00003385#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00003386static
sewardj23f12002009-07-24 08:45:08 +00003387 struct { VtsID vi1; VtsID vi2; Bool leq; }
3388 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00003389
3390#define N_JOIN2_CACHE 1023
3391static
3392 struct { VtsID vi1; VtsID vi2; VtsID res; }
3393 join2_cache[N_JOIN2_CACHE];
3394
3395static void VtsID__invalidate_caches ( void ) {
3396 Int i;
sewardj23f12002009-07-24 08:45:08 +00003397 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3398 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3399 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3400 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00003401 }
3402 for (i = 0; i < N_JOIN2_CACHE; i++) {
3403 join2_cache[i].vi1 = VtsID_INVALID;
3404 join2_cache[i].vi2 = VtsID_INVALID;
3405 join2_cache[i].res = VtsID_INVALID;
3406 }
3407}
3408//////////////////////////
3409
sewardjd52392d2008-11-08 20:36:26 +00003410//static Bool VtsID__is_valid ( VtsID vi ) {
3411// VtsTE* ve;
3412// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3413// return False;
3414// ve = VG_(indexXA)( vts_tab, vi );
3415// if (!ve->vts)
3416// return False;
3417// tl_assert(ve->vts->id == vi);
3418// return True;
3419//}
sewardjf98e1c02008-10-25 16:22:41 +00003420
3421static VTS* VtsID__to_VTS ( VtsID vi ) {
3422 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3423 tl_assert(te->vts);
3424 return te->vts;
3425}
3426
3427static void VtsID__pp ( VtsID vi ) {
sewardjf98e1c02008-10-25 16:22:41 +00003428 VTS* vts = VtsID__to_VTS(vi);
florianb28fe892014-10-28 20:52:07 +00003429 VTS__show( vts );
sewardjf98e1c02008-10-25 16:22:41 +00003430}
3431
3432/* compute partial ordering relation of vi1 and vi2. */
3433__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00003434static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00003435 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00003436 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00003437 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00003438 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00003439 tl_assert(vi1 != vi2);
3440 ////++
sewardj23f12002009-07-24 08:45:08 +00003441 stats__cmpLEQ_queries++;
3442 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3443 if (cmpLEQ_cache[hash].vi1 == vi1
3444 && cmpLEQ_cache[hash].vi2 == vi2)
3445 return cmpLEQ_cache[hash].leq;
3446 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00003447 ////--
3448 v1 = VtsID__to_VTS(vi1);
3449 v2 = VtsID__to_VTS(vi2);
sewardje4cce742011-02-24 15:25:24 +00003450 leq = VTS__cmpLEQ( v1, v2 ) == 0;
sewardjf98e1c02008-10-25 16:22:41 +00003451 ////++
sewardj23f12002009-07-24 08:45:08 +00003452 cmpLEQ_cache[hash].vi1 = vi1;
3453 cmpLEQ_cache[hash].vi2 = vi2;
3454 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00003455 ////--
sewardj23f12002009-07-24 08:45:08 +00003456 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00003457}
sewardj23f12002009-07-24 08:45:08 +00003458static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3459 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003460}
3461
3462/* compute binary join */
3463__attribute__((noinline))
3464static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3465 UInt hash;
3466 VtsID res;
sewardj7aa38a92011-02-27 23:04:12 +00003467 VTS *vts1, *vts2;
sewardjf98e1c02008-10-25 16:22:41 +00003468 //if (vi1 == vi2) return vi1;
3469 tl_assert(vi1 != vi2);
3470 ////++
3471 stats__join2_queries++;
3472 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3473 if (join2_cache[hash].vi1 == vi1
3474 && join2_cache[hash].vi2 == vi2)
3475 return join2_cache[hash].res;
3476 stats__join2_misses++;
3477 ////--
3478 vts1 = VtsID__to_VTS(vi1);
3479 vts2 = VtsID__to_VTS(vi2);
sewardj7aa38a92011-02-27 23:04:12 +00003480 temp_max_sized_VTS->usedTS = 0;
3481 VTS__join(temp_max_sized_VTS, vts1,vts2);
3482 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003483 ////++
3484 join2_cache[hash].vi1 = vi1;
3485 join2_cache[hash].vi2 = vi2;
3486 join2_cache[hash].res = res;
3487 ////--
3488 return res;
3489}
3490static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003491 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003492}
3493
3494/* create a singleton VTS, namely [thr:1] */
3495static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
sewardj7aa38a92011-02-27 23:04:12 +00003496 temp_max_sized_VTS->usedTS = 0;
3497 VTS__singleton(temp_max_sized_VTS, thr,tym);
3498 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003499}
3500
3501/* tick operation, creates value 1 if specified index is absent */
3502static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3503 VTS* vts = VtsID__to_VTS(vi);
sewardj7aa38a92011-02-27 23:04:12 +00003504 temp_max_sized_VTS->usedTS = 0;
3505 VTS__tick(temp_max_sized_VTS, idx,vts);
3506 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003507}
3508
3509/* index into a VTS (only for assertions) */
3510static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3511 VTS* vts = VtsID__to_VTS(vi);
3512 return VTS__indexAt_SLOW( vts, idx );
3513}
3514
sewardj23f12002009-07-24 08:45:08 +00003515/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3516 any, really) element in vi1 which is pointwise greater-than the
3517 corresponding element in vi2. If no such element exists, return
3518 NULL. This needs to be fairly quick since it is called every time
3519 a race is detected. */
3520static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3521{
3522 VTS *vts1, *vts2;
sewardje4cce742011-02-24 15:25:24 +00003523 Thr* diffthr;
3524 ThrID diffthrid;
sewardj23f12002009-07-24 08:45:08 +00003525 tl_assert(vi1 != vi2);
3526 vts1 = VtsID__to_VTS(vi1);
3527 vts2 = VtsID__to_VTS(vi2);
3528 tl_assert(vts1 != vts2);
sewardje4cce742011-02-24 15:25:24 +00003529 diffthrid = VTS__cmpLEQ(vts1, vts2);
3530 diffthr = Thr__from_ThrID(diffthrid);
sewardj23f12002009-07-24 08:45:08 +00003531 tl_assert(diffthr); /* else they are LEQ ! */
3532 return diffthr;
3533}
3534
3535
3536/////////////////////////////////////////////////////////
3537// //
3538// Filters //
3539// //
3540/////////////////////////////////////////////////////////
3541
sewardj23f12002009-07-24 08:45:08 +00003542/* Forget everything we know -- clear the filter and let everything
3543 through. This needs to be as fast as possible, since it is called
3544 every time the running thread changes, and every time a thread's
3545 vector clocks change, which can be quite frequent. The obvious
3546 fast way to do this is simply to stuff in tags which we know are
3547 not going to match anything, since they're not aligned to the start
3548 of a line. */
florian6bd9dc12012-11-23 16:17:43 +00003549static void Filter__clear ( Filter* fi, const HChar* who )
sewardj23f12002009-07-24 08:45:08 +00003550{
3551 UWord i;
3552 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3553 for (i = 0; i < FI_NUM_LINES; i += 8) {
3554 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3555 fi->tags[i+1] = 1;
3556 fi->tags[i+2] = 1;
3557 fi->tags[i+3] = 1;
3558 fi->tags[i+4] = 1;
3559 fi->tags[i+5] = 1;
3560 fi->tags[i+6] = 1;
3561 fi->tags[i+7] = 1;
3562 }
3563 tl_assert(i == FI_NUM_LINES);
3564}
3565
3566/* Clearing an arbitrary range in the filter. Unfortunately
3567 we have to do this due to core-supplied new/die-mem events. */
3568
3569static void Filter__clear_1byte ( Filter* fi, Addr a )
3570{
3571 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3572 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3573 FiLine* line = &fi->lines[lineno];
3574 UWord loff = (a - atag) / 8;
3575 UShort mask = 0x3 << (2 * (a & 7));
3576 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3577 if (LIKELY( fi->tags[lineno] == atag )) {
3578 /* hit. clear the bits. */
3579 UShort u16 = line->u16s[loff];
3580 line->u16s[loff] = u16 & ~mask; /* clear them */
3581 } else {
3582 /* miss. The filter doesn't hold this address, so ignore. */
3583 }
3584}
3585
3586static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3587{
3588 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3589 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3590 FiLine* line = &fi->lines[lineno];
3591 UWord loff = (a - atag) / 8;
3592 if (LIKELY( fi->tags[lineno] == atag )) {
3593 line->u16s[loff] = 0;
3594 } else {
3595 /* miss. The filter doesn't hold this address, so ignore. */
3596 }
3597}
3598
philippefc00a2a2015-05-15 11:41:54 +00003599/* Only used to verify the fast Filter__clear_range */
3600__attribute__((unused))
3601static void Filter__clear_range_SLOW ( Filter* fi, Addr a, UWord len )
sewardj23f12002009-07-24 08:45:08 +00003602{
philippefc00a2a2015-05-15 11:41:54 +00003603 tl_assert (CHECK_ZSM);
3604
sewardj23f12002009-07-24 08:45:08 +00003605 /* slowly do part preceding 8-alignment */
3606 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3607 Filter__clear_1byte( fi, a );
3608 a++;
3609 len--;
3610 }
3611 /* vector loop */
3612 while (len >= 8) {
3613 Filter__clear_8bytes_aligned( fi, a );
3614 a += 8;
3615 len -= 8;
3616 }
3617 /* slowly do tail */
3618 while (UNLIKELY(len > 0)) {
3619 Filter__clear_1byte( fi, a );
3620 a++;
3621 len--;
3622 }
3623}
3624
philippefc00a2a2015-05-15 11:41:54 +00003625static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3626{
3627# if CHECK_ZSM > 0
3628 /* We check the below more complex algorithm with the simple one.
3629 This check is very expensive : we do first the slow way on a
3630 copy of the data, then do it the fast way. On RETURN, we check
3631 the two values are equal. */
3632 Filter fi_check = *fi;
3633 Filter__clear_range_SLOW(&fi_check, a, len);
3634# define RETURN goto check_and_return
3635# else
3636# define RETURN return
3637# endif
3638
3639 Addr begtag = FI_GET_TAG(a); /* tag of range begin */
3640
3641 Addr end = a + len - 1;
3642 Addr endtag = FI_GET_TAG(end); /* tag of range end. */
3643
3644 UWord rlen = len; /* remaining length to clear */
3645
3646 Addr c = a; /* Current position we are clearing. */
3647 UWord clineno = FI_GET_LINENO(c); /* Current lineno we are clearing */
3648 FiLine* cline; /* Current line we are clearing */
3649 UWord cloff; /* Current offset in line we are clearing, when clearing
3650 partial lines. */
3651
3652 UShort u16;
3653
3654 STATIC_ASSERT (FI_LINE_SZB == 32);
3655 // Below assumes filter lines are 32 bytes
3656
3657 if (LIKELY(fi->tags[clineno] == begtag)) {
3658 /* LIKELY for the heavy caller VG_(unknown_SP_update). */
3659 /* First filter line matches begtag.
3660 If c is not at the filter line begin, the below will clear
3661 the filter line bytes starting from c. */
3662 cline = &fi->lines[clineno];
3663 cloff = (c - begtag) / 8;
3664
3665 /* First the byte(s) needed to reach 8-alignment */
3666 if (UNLIKELY(!VG_IS_8_ALIGNED(c))) {
3667 /* hiB is the nr of bytes (higher addresses) from c to reach
3668 8-aligment. */
3669 UWord hiB = 8 - (c & 7);
3670 /* Compute 2-bit/byte mask representing hiB bytes [c..c+hiB[
3671 mask is C000 , F000, FC00, FF00, FFC0, FFF0 or FFFC for the byte
3672 range 7..7 6..7 5..7 4..7 3..7 2..7 1..7 */
3673 UShort mask = 0xFFFF << (16 - 2*hiB);
3674
3675 u16 = cline->u16s[cloff];
3676 if (LIKELY(rlen >= hiB)) {
3677 cline->u16s[cloff] = u16 & ~mask; /* clear all hiB from c */
3678 rlen -= hiB;
3679 c += hiB;
3680 cloff += 1;
3681 } else {
3682 /* Only have the bits for rlen bytes bytes. */
3683 mask = mask & ~(0xFFFF << (16 - 2*(hiB-rlen)));
3684 cline->u16s[cloff] = u16 & ~mask; /* clear rlen bytes from c. */
3685 RETURN; // We have cleared all what we can.
3686 }
3687 }
3688 /* c is now 8 aligned. Clear by 8 aligned bytes,
3689 till c is filter-line aligned */
3690 while (!VG_IS_32_ALIGNED(c) && rlen >= 8) {
3691 cline->u16s[cloff] = 0;
3692 c += 8;
3693 rlen -= 8;
3694 cloff += 1;
3695 }
3696 } else {
3697 c = begtag + FI_LINE_SZB;
3698 if (c > end)
3699 RETURN; // We have cleared all what we can.
3700 rlen -= c - a;
3701 }
3702 // We have changed c, so re-establish clineno.
3703 clineno = FI_GET_LINENO(c);
3704
3705 if (rlen >= FI_LINE_SZB) {
3706 /* Here, c is filter line-aligned. Clear all full lines that
3707 overlap with the range starting at c, made of a full lines */
3708 UWord nfull = rlen / FI_LINE_SZB;
3709 UWord full_len = nfull * FI_LINE_SZB;
3710 rlen -= full_len;
3711 if (nfull > FI_NUM_LINES)
3712 nfull = FI_NUM_LINES; // no need to check several times the same entry.
3713
3714 for (UWord n = 0; n < nfull; n++) {
3715 if (UNLIKELY(address_in_range(fi->tags[clineno], c, full_len))) {
3716 cline = &fi->lines[clineno];
3717 cline->u16s[0] = 0;
3718 cline->u16s[1] = 0;
3719 cline->u16s[2] = 0;
3720 cline->u16s[3] = 0;
3721 STATIC_ASSERT (4 == sizeof(cline->u16s)/sizeof(cline->u16s[0]));
3722 }
3723 clineno++;
3724 if (UNLIKELY(clineno == FI_NUM_LINES))
3725 clineno = 0;
3726 }
3727
3728 c += full_len;
3729 clineno = FI_GET_LINENO(c);
3730 }
3731
3732 if (CHECK_ZSM) {
3733 tl_assert(VG_IS_8_ALIGNED(c));
3734 tl_assert(clineno == FI_GET_LINENO(c));
3735 }
3736
3737 /* Do the last filter line, if it was not cleared as a full filter line */
3738 if (UNLIKELY(rlen > 0) && fi->tags[clineno] == endtag) {
3739 cline = &fi->lines[clineno];
3740 cloff = (c - endtag) / 8;
3741 if (CHECK_ZSM) tl_assert(FI_GET_TAG(c) == endtag);
3742
3743 /* c is 8 aligned. Clear by 8 aligned bytes, till we have less than
3744 8 bytes. */
3745 while (rlen >= 8) {
3746 cline->u16s[cloff] = 0;
3747 c += 8;
3748 rlen -= 8;
3749 cloff += 1;
3750 }
3751 /* Then the remaining byte(s) */
3752 if (rlen > 0) {
3753 /* nr of bytes from c to reach end. */
3754 UWord loB = rlen;
3755 /* Compute mask representing loB bytes [c..c+loB[ :
3756 mask is 0003, 000F, 003F, 00FF, 03FF, 0FFF or 3FFF */
3757 UShort mask = 0xFFFF >> (16 - 2*loB);
3758
3759 u16 = cline->u16s[cloff];
3760 cline->u16s[cloff] = u16 & ~mask; /* clear all loB from c */
3761 }
3762 }
3763
3764# if CHECK_ZSM > 0
3765 check_and_return:
3766 tl_assert (VG_(memcmp)(&fi_check, fi, sizeof(fi_check)) == 0);
3767# endif
3768# undef RETURN
3769}
sewardj23f12002009-07-24 08:45:08 +00003770
3771/* ------ Read handlers for the filter. ------ */
3772
3773static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3774{
3775 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3776 return False;
3777 {
3778 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3779 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3780 FiLine* line = &fi->lines[lineno];
3781 UWord loff = (a - atag) / 8;
3782 UShort mask = 0xAAAA;
3783 if (LIKELY( fi->tags[lineno] == atag )) {
3784 /* hit. check line and update. */
3785 UShort u16 = line->u16s[loff];
3786 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3787 line->u16s[loff] = u16 | mask; /* set them */
3788 return ok;
3789 } else {
3790 /* miss. nuke existing line and re-use it. */
3791 UWord i;
3792 fi->tags[lineno] = atag;
3793 for (i = 0; i < FI_LINE_SZB / 8; i++)
3794 line->u16s[i] = 0;
3795 line->u16s[loff] = mask;
3796 return False;
3797 }
3798 }
3799}
3800
3801static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3802{
3803 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3804 return False;
3805 {
3806 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3807 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3808 FiLine* line = &fi->lines[lineno];
3809 UWord loff = (a - atag) / 8;
3810 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3811 if (LIKELY( fi->tags[lineno] == atag )) {
3812 /* hit. check line and update. */
3813 UShort u16 = line->u16s[loff];
3814 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3815 line->u16s[loff] = u16 | mask; /* set them */
3816 return ok;
3817 } else {
3818 /* miss. nuke existing line and re-use it. */
3819 UWord i;
3820 fi->tags[lineno] = atag;
3821 for (i = 0; i < FI_LINE_SZB / 8; i++)
3822 line->u16s[i] = 0;
3823 line->u16s[loff] = mask;
3824 return False;
3825 }
3826 }
3827}
3828
3829static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3830{
3831 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3832 return False;
3833 {
3834 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3835 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3836 FiLine* line = &fi->lines[lineno];
3837 UWord loff = (a - atag) / 8;
3838 UShort mask = 0xA << (2 * (a & 6));
3839 /* mask is A000, 0A00, 00A0 or 000A */
3840 if (LIKELY( fi->tags[lineno] == atag )) {
3841 /* hit. check line and update. */
3842 UShort u16 = line->u16s[loff];
3843 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3844 line->u16s[loff] = u16 | mask; /* set them */
3845 return ok;
3846 } else {
3847 /* miss. nuke existing line and re-use it. */
3848 UWord i;
3849 fi->tags[lineno] = atag;
3850 for (i = 0; i < FI_LINE_SZB / 8; i++)
3851 line->u16s[i] = 0;
3852 line->u16s[loff] = mask;
3853 return False;
3854 }
3855 }
3856}
3857
3858static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3859{
3860 {
3861 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3862 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3863 FiLine* line = &fi->lines[lineno];
3864 UWord loff = (a - atag) / 8;
3865 UShort mask = 0x2 << (2 * (a & 7));
3866 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3867 if (LIKELY( fi->tags[lineno] == atag )) {
3868 /* hit. check line and update. */
3869 UShort u16 = line->u16s[loff];
3870 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3871 line->u16s[loff] = u16 | mask; /* set them */
3872 return ok;
3873 } else {
3874 /* miss. nuke existing line and re-use it. */
3875 UWord i;
3876 fi->tags[lineno] = atag;
3877 for (i = 0; i < FI_LINE_SZB / 8; i++)
3878 line->u16s[i] = 0;
3879 line->u16s[loff] = mask;
3880 return False;
3881 }
3882 }
3883}
3884
3885
3886/* ------ Write handlers for the filter. ------ */
3887
3888static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3889{
3890 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3891 return False;
3892 {
3893 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3894 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3895 FiLine* line = &fi->lines[lineno];
3896 UWord loff = (a - atag) / 8;
3897 UShort mask = 0xFFFF;
3898 if (LIKELY( fi->tags[lineno] == atag )) {
3899 /* hit. check line and update. */
3900 UShort u16 = line->u16s[loff];
3901 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3902 line->u16s[loff] = u16 | mask; /* set them */
3903 return ok;
3904 } else {
3905 /* miss. nuke existing line and re-use it. */
3906 UWord i;
3907 fi->tags[lineno] = atag;
3908 for (i = 0; i < FI_LINE_SZB / 8; i++)
3909 line->u16s[i] = 0;
3910 line->u16s[loff] = mask;
3911 return False;
3912 }
3913 }
3914}
3915
3916static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3917{
3918 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3919 return False;
3920 {
3921 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3922 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3923 FiLine* line = &fi->lines[lineno];
3924 UWord loff = (a - atag) / 8;
3925 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3926 if (LIKELY( fi->tags[lineno] == atag )) {
3927 /* hit. check line and update. */
3928 UShort u16 = line->u16s[loff];
3929 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3930 line->u16s[loff] = u16 | mask; /* set them */
3931 return ok;
3932 } else {
3933 /* miss. nuke existing line and re-use it. */
3934 UWord i;
3935 fi->tags[lineno] = atag;
3936 for (i = 0; i < FI_LINE_SZB / 8; i++)
3937 line->u16s[i] = 0;
3938 line->u16s[loff] = mask;
3939 return False;
3940 }
3941 }
3942}
3943
3944static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3945{
3946 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3947 return False;
3948 {
3949 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3950 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3951 FiLine* line = &fi->lines[lineno];
3952 UWord loff = (a - atag) / 8;
3953 UShort mask = 0xF << (2 * (a & 6));
3954 /* mask is F000, 0F00, 00F0 or 000F */
3955 if (LIKELY( fi->tags[lineno] == atag )) {
3956 /* hit. check line and update. */
3957 UShort u16 = line->u16s[loff];
3958 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3959 line->u16s[loff] = u16 | mask; /* set them */
3960 return ok;
3961 } else {
3962 /* miss. nuke existing line and re-use it. */
3963 UWord i;
3964 fi->tags[lineno] = atag;
3965 for (i = 0; i < FI_LINE_SZB / 8; i++)
3966 line->u16s[i] = 0;
3967 line->u16s[loff] = mask;
3968 return False;
3969 }
3970 }
3971}
3972
3973static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3974{
3975 {
3976 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3977 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3978 FiLine* line = &fi->lines[lineno];
3979 UWord loff = (a - atag) / 8;
3980 UShort mask = 0x3 << (2 * (a & 7));
3981 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3982 if (LIKELY( fi->tags[lineno] == atag )) {
3983 /* hit. check line and update. */
3984 UShort u16 = line->u16s[loff];
3985 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3986 line->u16s[loff] = u16 | mask; /* set them */
3987 return ok;
3988 } else {
3989 /* miss. nuke existing line and re-use it. */
3990 UWord i;
3991 fi->tags[lineno] = atag;
3992 for (i = 0; i < FI_LINE_SZB / 8; i++)
3993 line->u16s[i] = 0;
3994 line->u16s[loff] = mask;
3995 return False;
3996 }
3997 }
3998}
3999
sewardjf98e1c02008-10-25 16:22:41 +00004000
4001/////////////////////////////////////////////////////////
4002// //
4003// Threads //
4004// //
4005/////////////////////////////////////////////////////////
4006
sewardje4cce742011-02-24 15:25:24 +00004007/* Maps ThrID values to their Thr*s (which contain ThrID values that
4008 should point back to the relevant slot in the array. Lowest
4009 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
4010static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
4011
4012/* And a counter to dole out ThrID values. For rationale/background,
4013 see comments on definition of ScalarTS (far) above. */
sewardj7aa38a92011-02-27 23:04:12 +00004014static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
sewardje4cce742011-02-24 15:25:24 +00004015
4016static ThrID Thr__to_ThrID ( Thr* thr ) {
4017 return thr->thrid;
4018}
4019static Thr* Thr__from_ThrID ( UInt thrid ) {
4020 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
4021 tl_assert(thr->thrid == thrid);
4022 return thr;
4023}
4024
4025static Thr* Thr__new ( void )
4026{
sewardjf98e1c02008-10-25 16:22:41 +00004027 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
4028 thr->viR = VtsID_INVALID;
4029 thr->viW = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00004030 thr->llexit_done = False;
4031 thr->joinedwith_done = False;
sewardj23f12002009-07-24 08:45:08 +00004032 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
philippeca903bb2014-04-26 22:50:08 +00004033 if (HG_(clo_history_level) == 1)
4034 thr->local_Kws_n_stacks
4035 = VG_(newXA)( HG_(zalloc),
4036 "libhb.Thr__new.3 (local_Kws_and_stacks)",
4037 HG_(free), sizeof(ULong_n_EC) );
sewardje4cce742011-02-24 15:25:24 +00004038
4039 /* Add this Thr* <-> ThrID binding to the mapping, and
4040 cross-check */
4041 if (!thrid_to_thr_map) {
4042 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
4043 HG_(free), sizeof(Thr*) );
sewardje4cce742011-02-24 15:25:24 +00004044 }
4045
sewardj7aa38a92011-02-27 23:04:12 +00004046 if (thrid_counter >= ThrID_MAX_VALID) {
sewardje4cce742011-02-24 15:25:24 +00004047 /* We're hosed. We have to stop. */
4048 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
4049 }
4050
4051 thr->thrid = thrid_counter++;
4052 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
4053 tl_assert(ix + 1024 == thr->thrid);
4054
sewardjf98e1c02008-10-25 16:22:41 +00004055 return thr;
4056}
4057
sewardj8ab2c132009-08-02 09:34:35 +00004058static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00004059{
4060 Word nPresent;
4061 ULong_n_EC pair;
4062 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00004063
4064 // We only collect this info at history level 1 (approx)
4065 if (HG_(clo_history_level) != 1)
4066 return;
4067
sewardj8ab2c132009-08-02 09:34:35 +00004068 /* This is the scalar Kw for thr. */
4069 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00004070 pair.ec = main_get_EC( thr );
4071 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00004072 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004073
4074 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00004075 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00004076
4077 /* Throw away old stacks, if necessary. We can't accumulate stuff
4078 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00004079 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
4080 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
4081 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4082 if (0)
4083 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00004084 thr, pair.ull, pair.ec );
4085 }
4086
4087 if (nPresent > 0) {
4088 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00004089 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
4090 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00004091 }
4092
4093 if (nPresent == 0)
4094 pair.ec = NULL;
4095
sewardj8ab2c132009-08-02 09:34:35 +00004096 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00004097
4098 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00004099 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00004100 thr, pair.ull, pair.ec );
4101 if (0)
4102 VG_(pp_ExeContext)(pair.ec);
4103}
4104
florian6bd9dc12012-11-23 16:17:43 +00004105static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
4106 const ULong_n_EC* pair2 )
sewardj23f12002009-07-24 08:45:08 +00004107{
4108 if (pair1->ull < pair2->ull) return -1;
4109 if (pair1->ull > pair2->ull) return 1;
4110 return 0;
4111}
4112
sewardjf98e1c02008-10-25 16:22:41 +00004113
4114/////////////////////////////////////////////////////////
4115// //
4116// Shadow Values //
4117// //
4118/////////////////////////////////////////////////////////
4119
4120// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
4121// hb_zsm.h. We have to do everything else here.
4122
4123/* SVal is 64 bit unsigned int.
4124
4125 <---------30---------> <---------30--------->
4126 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00004127 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00004128 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
4129
sewardjf98e1c02008-10-25 16:22:41 +00004130*/
4131#define SVAL_TAGMASK (3ULL << 62)
4132
4133static inline Bool SVal__isC ( SVal s ) {
4134 return (0ULL << 62) == (s & SVAL_TAGMASK);
4135}
4136static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
4137 //tl_assert(VtsID__is_valid(rmini));
4138 //tl_assert(VtsID__is_valid(wmini));
4139 return (((ULong)rmini) << 32) | ((ULong)wmini);
4140}
4141static inline VtsID SVal__unC_Rmin ( SVal s ) {
4142 tl_assert(SVal__isC(s));
4143 return (VtsID)(s >> 32);
4144}
4145static inline VtsID SVal__unC_Wmin ( SVal s ) {
4146 tl_assert(SVal__isC(s));
4147 return (VtsID)(s & 0xFFFFFFFFULL);
4148}
4149
sewardj23f12002009-07-24 08:45:08 +00004150static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004151 return (2ULL << 62) == (s & SVAL_TAGMASK);
4152}
sewardj5aa09bf2014-06-20 14:25:53 +00004153__attribute__((unused))
sewardj23f12002009-07-24 08:45:08 +00004154static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00004155 return 2ULL << 62;
4156}
4157
4158/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00004159static inline void SVal__rcinc ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004160 if (SVal__isC(s)) {
4161 VtsID__rcinc( SVal__unC_Rmin(s) );
4162 VtsID__rcinc( SVal__unC_Wmin(s) );
4163 }
4164}
4165
4166/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00004167static inline void SVal__rcdec ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004168 if (SVal__isC(s)) {
4169 VtsID__rcdec( SVal__unC_Rmin(s) );
4170 VtsID__rcdec( SVal__unC_Wmin(s) );
4171 }
4172}
4173
4174
4175/////////////////////////////////////////////////////////
4176// //
4177// Change-event map2 //
4178// //
4179/////////////////////////////////////////////////////////
4180
sewardjf98e1c02008-10-25 16:22:41 +00004181/* This is in two parts:
4182
sewardj23f12002009-07-24 08:45:08 +00004183 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00004184 traces. When the reference count of a stack trace becomes zero,
4185 it is removed from the set and freed up. The intent is to have
4186 a set of stack traces which can be referred to from (2), but to
4187 only represent each one once. The set is indexed/searched by
4188 ordering on the stack trace vectors.
4189
sewardj849b0ed2008-12-21 10:43:10 +00004190 2. A SparseWA of OldRefs. These store information about each old
4191 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00004192 location for which the information is recorded. For LRU
philippecabdbb52015-04-20 21:33:16 +00004193 purposes, each OldRef in the SparseWA is also on a doubly
4194 linked list maintaining the order in which the OldRef were most
4195 recently accessed.
sewardjf98e1c02008-10-25 16:22:41 +00004196
4197 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00004198 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
4199 size) triples to RCECs. This allows us to collect the last
4200 access-traceback by up to N_OLDREF_ACCS different triples for
4201 this location. The accs[] array is a MTF-array. If a binding
4202 falls off the end, that's too bad -- we will lose info about
4203 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00004204
philippecabdbb52015-04-20 21:33:16 +00004205 We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
4206 Then we do exact LRU discarding. For each discarded OldRef we must
sewardjf98e1c02008-10-25 16:22:41 +00004207 of course decrement the reference count on the all RCECs it
4208 refers to, in order that entries from (1) eventually get
4209 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00004210
4211 A major improvement in reliability of this mechanism would be to
4212 have a dynamically sized OldRef.accs[] array, so no entries ever
4213 fall off the end. In investigations (Dec 08) it appears that a
4214 major cause for the non-availability of conflicting-access traces
4215 in race reports is caused by the fixed size of this array. I
4216 suspect for most OldRefs, only a few entries are used, but for a
4217 minority of cases there is an overflow, leading to info lossage.
4218 Investigations also suggest this is very workload and scheduling
4219 sensitive. Therefore a dynamic sizing would be better.
4220
philippe6643e962012-01-17 21:16:30 +00004221 However, dynamic sizing would defeat the use of a PoolAllocator
sewardj849b0ed2008-12-21 10:43:10 +00004222 for OldRef structures. And that's important for performance. So
4223 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00004224*/
4225
4226
4227static UWord stats__ctxt_rcdec1 = 0;
4228static UWord stats__ctxt_rcdec2 = 0;
4229static UWord stats__ctxt_rcdec3 = 0;
4230static UWord stats__ctxt_rcdec_calls = 0;
4231static UWord stats__ctxt_rcdec_discards = 0;
4232static UWord stats__ctxt_rcdec1_eq = 0;
4233
4234static UWord stats__ctxt_tab_curr = 0;
4235static UWord stats__ctxt_tab_max = 0;
4236
4237static UWord stats__ctxt_tab_qs = 0;
4238static UWord stats__ctxt_tab_cmps = 0;
4239
4240
4241///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00004242//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00004243///
4244
4245#define N_FRAMES 8
4246
4247// (UInt) `echo "Reference Counted Execution Context" | md5sum`
4248#define RCEC_MAGIC 0xab88abb2UL
4249
4250//#define N_RCEC_TAB 98317 /* prime */
4251#define N_RCEC_TAB 196613 /* prime */
4252
4253typedef
4254 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00004255 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00004256 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00004257 UWord rc;
4258 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00004259 UWord frames_hash; /* hash of all the frames */
4260 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00004261 }
4262 RCEC;
4263
philippecabdbb52015-04-20 21:33:16 +00004264//////////// BEGIN RCEC pool allocator
4265static PoolAlloc* rcec_pool_allocator;
4266static RCEC* alloc_RCEC ( void ) {
4267 return VG_(allocEltPA) ( rcec_pool_allocator );
4268}
4269
4270static void free_RCEC ( RCEC* rcec ) {
4271 tl_assert(rcec->magic == RCEC_MAGIC);
4272 VG_(freeEltPA)( rcec_pool_allocator, rcec );
4273}
4274//////////// END RCEC pool allocator
4275
sewardjf98e1c02008-10-25 16:22:41 +00004276static RCEC** contextTab = NULL; /* hash table of RCEC*s */
4277
philippecabdbb52015-04-20 21:33:16 +00004278/* Count of allocated RCEC having ref count > 0 */
4279static UWord RCEC_referenced = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004280
4281/* Gives an arbitrary total order on RCEC .frames fields */
4282static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4283 Word i;
4284 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4285 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00004286 if (ec1->frames_hash < ec2->frames_hash) return -1;
4287 if (ec1->frames_hash > ec2->frames_hash) return 1;
4288 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004289 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00004290 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00004291 }
4292 return 0;
4293}
4294
4295
4296/* Dec the ref of this RCEC. */
4297static void ctxt__rcdec ( RCEC* ec )
4298{
4299 stats__ctxt_rcdec_calls++;
4300 tl_assert(ec && ec->magic == RCEC_MAGIC);
4301 tl_assert(ec->rc > 0);
4302 ec->rc--;
philippecabdbb52015-04-20 21:33:16 +00004303 if (ec->rc == 0)
4304 RCEC_referenced--;
sewardjf98e1c02008-10-25 16:22:41 +00004305}
4306
4307static void ctxt__rcinc ( RCEC* ec )
4308{
4309 tl_assert(ec && ec->magic == RCEC_MAGIC);
philippecabdbb52015-04-20 21:33:16 +00004310 if (ec->rc == 0)
4311 RCEC_referenced++;
sewardjf98e1c02008-10-25 16:22:41 +00004312 ec->rc++;
4313}
4314
4315
4316/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
4317 move it one step closer the the front of the list, so as to make
4318 subsequent searches for it cheaper. */
4319static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4320{
4321 RCEC *ec0, *ec1, *ec2;
4322 if (ec == *headp)
4323 tl_assert(0); /* already at head of list */
4324 tl_assert(ec != NULL);
4325 ec0 = *headp;
4326 ec1 = NULL;
4327 ec2 = NULL;
4328 while (True) {
4329 if (ec0 == NULL || ec0 == ec) break;
4330 ec2 = ec1;
4331 ec1 = ec0;
4332 ec0 = ec0->next;
4333 }
4334 tl_assert(ec0 == ec);
4335 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4336 RCEC* tmp;
4337 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4338 predecessor. Swap ec0 and ec1, that is, move ec0 one step
4339 closer to the start of the list. */
4340 tl_assert(ec2->next == ec1);
4341 tl_assert(ec1->next == ec0);
4342 tmp = ec0->next;
4343 ec2->next = ec0;
4344 ec0->next = ec1;
4345 ec1->next = tmp;
4346 }
4347 else
4348 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4349 /* it's second in the list. */
4350 tl_assert(*headp == ec1);
4351 tl_assert(ec1->next == ec0);
4352 ec1->next = ec0->next;
4353 ec0->next = ec1;
4354 *headp = ec0;
4355 }
4356}
4357
4358
4359/* Find the given RCEC in the tree, and return a pointer to it. Or,
4360 if not present, add the given one to the tree (by making a copy of
4361 it, so the caller can immediately deallocate the original) and
4362 return a pointer to the copy. The caller can safely have 'example'
4363 on its stack, since we will always return a pointer to a copy of
4364 it, not to the original. Note that the inserted node will have .rc
4365 of zero and so the caller must immediatly increment it. */
4366__attribute__((noinline))
4367static RCEC* ctxt__find_or_add ( RCEC* example )
4368{
4369 UWord hent;
4370 RCEC* copy;
4371 tl_assert(example && example->magic == RCEC_MAGIC);
4372 tl_assert(example->rc == 0);
4373
4374 /* Search the hash table to see if we already have it. */
4375 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00004376 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00004377 copy = contextTab[hent];
4378 while (1) {
4379 if (!copy) break;
4380 tl_assert(copy->magic == RCEC_MAGIC);
4381 stats__ctxt_tab_cmps++;
4382 if (0 == RCEC__cmp_by_frames(copy, example)) break;
4383 copy = copy->next;
4384 }
4385
4386 if (copy) {
4387 tl_assert(copy != example);
4388 /* optimisation: if it's not at the head of its list, move 1
4389 step fwds, to make future searches cheaper */
4390 if (copy != contextTab[hent]) {
4391 move_RCEC_one_step_forward( &contextTab[hent], copy );
4392 }
4393 } else {
sewardjd86e3a22008-12-03 11:39:37 +00004394 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00004395 tl_assert(copy != example);
4396 *copy = *example;
4397 copy->next = contextTab[hent];
4398 contextTab[hent] = copy;
4399 stats__ctxt_tab_curr++;
4400 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4401 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4402 }
4403 return copy;
4404}
4405
4406static inline UWord ROLW ( UWord w, Int n )
4407{
4408 Int bpw = 8 * sizeof(UWord);
4409 w = (w << n) | (w >> (bpw-n));
4410 return w;
4411}
4412
4413__attribute__((noinline))
4414static RCEC* get_RCEC ( Thr* thr )
4415{
4416 UWord hash, i;
4417 RCEC example;
4418 example.magic = RCEC_MAGIC;
4419 example.rc = 0;
4420 example.rcX = 0;
florian195623b2013-01-22 00:25:05 +00004421 example.next = NULL;
njn6c83d5e2009-05-05 23:46:24 +00004422 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00004423 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00004424 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004425 hash ^= example.frames[i];
4426 hash = ROLW(hash, 19);
4427 }
njn6c83d5e2009-05-05 23:46:24 +00004428 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00004429 return ctxt__find_or_add( &example );
4430}
4431
4432///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00004433//// Part (2):
4434/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00004435///
4436
sewardjffce8152011-06-24 10:09:41 +00004437/* Records an access: a thread, a context (size & writeness) and the
4438 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
4439 2, 10 = 4, 11 = 8.
sewardjc5ea9962008-12-07 01:41:46 +00004440*/
sewardjffce8152011-06-24 10:09:41 +00004441typedef
4442 struct {
4443 RCEC* rcec;
4444 WordSetID locksHeldW;
4445 UInt thrid : SCALARTS_N_THRBITS;
4446 UInt szLg2B : 2;
4447 UInt isW : 1;
4448 }
4449 Thr_n_RCEC;
sewardjf98e1c02008-10-25 16:22:41 +00004450
sewardj849b0ed2008-12-21 10:43:10 +00004451#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00004452
4453typedef
philippecabdbb52015-04-20 21:33:16 +00004454 struct OldRef {
4455 struct OldRef *prev; // to refs older than this one
4456 struct OldRef *next; // to refs newer that this one
4457 Addr ga; // Address for which we record up to N_OLDREF_ACCS accesses.
sewardjffce8152011-06-24 10:09:41 +00004458 /* unused slots in this array have .thrid == 0, which is invalid */
sewardjf98e1c02008-10-25 16:22:41 +00004459 Thr_n_RCEC accs[N_OLDREF_ACCS];
4460 }
4461 OldRef;
philippecabdbb52015-04-20 21:33:16 +00004462/* We need ga in OldRef in order to remove OldRef from the sparsewa
4463 by key (i.e. ga) when re-using the lru OldRef. */
sewardjd86e3a22008-12-03 11:39:37 +00004464
philippe6643e962012-01-17 21:16:30 +00004465//////////// BEGIN OldRef pool allocator
4466static PoolAlloc* oldref_pool_allocator;
philippecabdbb52015-04-20 21:33:16 +00004467// Note: We only allocate elements in this pool allocator, we never free them.
4468// We stop allocating elements at VG_(clo_conflict_cache_size).
philippe6643e962012-01-17 21:16:30 +00004469//////////// END OldRef pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00004470
philippecabdbb52015-04-20 21:33:16 +00004471static OldRef mru;
4472static OldRef lru;
4473// A double linked list, chaining all OldREf in a mru/lru order.
4474// mru/lru are sentinel nodes.
4475// Whenever an oldref is re-used, its position is changed as the most recently
4476// used (i.e. pointed to by mru.prev).
4477// When a new oldref is needed, it is allocated from the pool
4478// if we have not yet reached --conflict-cache-size.
4479// Otherwise, if all oldref have already been allocated,
4480// the least recently used (i.e. pointed to by lru.next) is re-used.
4481// When an OldRef is used, it is moved as the most recently used entry
4482// (i.e. pointed to by mru.prev).
4483
4484// Removes r from the double linked list
4485// Note: we do not need to test for special cases such as
4486// NULL next or prev pointers, because we have sentinel nodes
4487// at both sides of the list. So, a node is always forward and
4488// backward linked.
4489static inline void OldRef_unchain(OldRef *r)
4490{
4491 r->next->prev = r->prev;
4492 r->prev->next = r->next;
4493}
4494
4495// Insert new as the newest OldRef
4496// Similarly to OldRef_unchain, no need to test for NULL
4497// pointers, as e.g. mru.prev is always guaranteed to point
4498// to a non NULL node (lru when the list is empty).
4499static inline void OldRef_newest(OldRef *new)
4500{
4501 new->next = &mru;
4502 new->prev = mru.prev;
4503 mru.prev = new;
4504 new->prev->next = new;
4505}
sewardjd86e3a22008-12-03 11:39:37 +00004506
sewardjbc307e52008-12-06 22:10:54 +00004507static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
sewardjbc307e52008-12-06 22:10:54 +00004508static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
philippecabdbb52015-04-20 21:33:16 +00004509/* Note: the nr of ref in the oldrefTree will always be equal to
4510 the nr of elements that were allocated from the OldRef pool allocator
4511 as we never free an OldRef : we just re-use them. */
4512
4513
4514/* allocates a new OldRef or re-use the lru one if all allowed OldRef
4515 have already been allocated. */
4516static OldRef* alloc_or_reuse_OldRef ( void )
4517{
4518 if (oldrefTreeN < HG_(clo_conflict_cache_size)) {
4519 oldrefTreeN++;
4520 return VG_(allocEltPA) ( oldref_pool_allocator );
4521 } else {
4522 Bool b;
4523 UWord valW;
4524 OldRef *oldref = lru.next;
4525
4526 OldRef_unchain(oldref);
4527 b = VG_(delFromSWA)( oldrefTree, &valW, oldref->ga );
4528 tl_assert(b);
4529 tl_assert (oldref == (OldRef*)valW);
4530
4531 for (UInt i = 0; i < N_OLDREF_ACCS; i++) {
4532 ThrID aThrID = oldref->accs[i].thrid;
4533 RCEC* aRef = oldref->accs[i].rcec;
4534 if (aRef) {
4535 tl_assert(aThrID != 0);
4536 stats__ctxt_rcdec3++;
4537 ctxt__rcdec( aRef );
4538 } else {
4539 tl_assert(aThrID == 0);
4540 }
4541 }
4542 return oldref;
4543 }
4544}
4545
sewardjf98e1c02008-10-25 16:22:41 +00004546
sewardj1669cc72008-12-13 01:20:21 +00004547inline static UInt min_UInt ( UInt a, UInt b ) {
4548 return a < b ? a : b;
4549}
4550
sewardja781be62008-12-08 00:12:28 +00004551/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4552 first interval is lower, 1 if the first interval is higher, and 0
4553 if there is any overlap. Redundant paranoia with casting is there
4554 following what looked distinctly like a bug in gcc-4.1.2, in which
4555 some of the comparisons were done signedly instead of
4556 unsignedly. */
4557/* Copied from exp-ptrcheck/sg_main.c */
4558static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4559 Addr a2, SizeT n2 ) {
4560 UWord a1w = (UWord)a1;
4561 UWord n1w = (UWord)n1;
4562 UWord a2w = (UWord)a2;
4563 UWord n2w = (UWord)n2;
4564 tl_assert(n1w > 0 && n2w > 0);
4565 if (a1w + n1w <= a2w) return -1L;
4566 if (a2w + n2w <= a1w) return 1L;
4567 return 0;
4568}
4569
sewardjc5ea9962008-12-07 01:41:46 +00004570static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00004571{
sewardjd86e3a22008-12-03 11:39:37 +00004572 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00004573 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00004574 Word i, j;
philippe40648e22015-04-11 11:42:22 +00004575 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004576 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004577
sewardjffce8152011-06-24 10:09:41 +00004578 tl_assert(thr);
4579 ThrID thrid = thr->thrid;
4580 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4581
4582 WordSetID locksHeldW = thr->hgthread->locksetW;
4583
sewardjc5ea9962008-12-07 01:41:46 +00004584 rcec = get_RCEC( thr );
4585 ctxt__rcinc(rcec);
4586
sewardjffce8152011-06-24 10:09:41 +00004587 UInt szLg2B = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004588 switch (szB) {
4589 /* This doesn't look particularly branch-predictor friendly. */
sewardjffce8152011-06-24 10:09:41 +00004590 case 1: szLg2B = 0; break;
4591 case 2: szLg2B = 1; break;
4592 case 4: szLg2B = 2; break;
4593 case 8: szLg2B = 3; break;
sewardjc5ea9962008-12-07 01:41:46 +00004594 default: tl_assert(0);
4595 }
4596
sewardjffce8152011-06-24 10:09:41 +00004597 /* Look in the map to see if we already have a record for this
4598 address. */
philippe40648e22015-04-11 11:42:22 +00004599 b = VG_(lookupSWA)( oldrefTree, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00004600
sewardjd86e3a22008-12-03 11:39:37 +00004601 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00004602
4603 /* We already have a record for this address. We now need to
sewardjffce8152011-06-24 10:09:41 +00004604 see if we have a stack trace pertaining to this (thrid, R/W,
sewardj849b0ed2008-12-21 10:43:10 +00004605 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00004606 ref = (OldRef*)valW;
philippecabdbb52015-04-20 21:33:16 +00004607
4608 tl_assert (ref->ga == a);
sewardjf98e1c02008-10-25 16:22:41 +00004609
sewardjf98e1c02008-10-25 16:22:41 +00004610 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004611 if (ref->accs[i].thrid != thrid)
sewardj849b0ed2008-12-21 10:43:10 +00004612 continue;
sewardjffce8152011-06-24 10:09:41 +00004613 if (ref->accs[i].szLg2B != szLg2B)
4614 continue;
4615 if (ref->accs[i].isW != (UInt)(isW & 1))
sewardj849b0ed2008-12-21 10:43:10 +00004616 continue;
4617 /* else we have a match, so stop looking. */
4618 break;
sewardjf98e1c02008-10-25 16:22:41 +00004619 }
4620
4621 if (i < N_OLDREF_ACCS) {
sewardjffce8152011-06-24 10:09:41 +00004622 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */
sewardjf98e1c02008-10-25 16:22:41 +00004623 if (i > 0) {
4624 Thr_n_RCEC tmp = ref->accs[i-1];
4625 ref->accs[i-1] = ref->accs[i];
4626 ref->accs[i] = tmp;
4627 i--;
4628 }
sewardjc5ea9962008-12-07 01:41:46 +00004629 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00004630 stats__ctxt_rcdec1++;
sewardjffce8152011-06-24 10:09:41 +00004631 ctxt__rcdec( ref->accs[i].rcec );
4632 tl_assert(ref->accs[i].thrid == thrid);
4633 /* Update the RCEC and the W-held lockset. */
4634 ref->accs[i].rcec = rcec;
4635 ref->accs[i].locksHeldW = locksHeldW;
sewardjf98e1c02008-10-25 16:22:41 +00004636 } else {
sewardjffce8152011-06-24 10:09:41 +00004637 /* No entry for this (thread, R/W, size, nWHeld) quad.
4638 Shuffle all of them down one slot, and put the new entry
4639 at the start of the array. */
4640 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
sewardjf98e1c02008-10-25 16:22:41 +00004641 /* the last slot is in use. We must dec the rc on the
4642 associated rcec. */
4643 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
4644 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00004645 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
4646 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjffce8152011-06-24 10:09:41 +00004647 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
sewardjf98e1c02008-10-25 16:22:41 +00004648 } else {
4649 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
4650 }
4651 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
4652 ref->accs[j] = ref->accs[j-1];
sewardjffce8152011-06-24 10:09:41 +00004653 ref->accs[0].thrid = thrid;
4654 ref->accs[0].szLg2B = szLg2B;
4655 ref->accs[0].isW = (UInt)(isW & 1);
4656 ref->accs[0].locksHeldW = locksHeldW;
4657 ref->accs[0].rcec = rcec;
4658 /* thrid==0 is used to signify an empty slot, so we can't
4659 add zero thrid (such a ThrID is invalid anyway). */
4660 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
sewardjf98e1c02008-10-25 16:22:41 +00004661 }
4662
philippecabdbb52015-04-20 21:33:16 +00004663 OldRef_unchain(ref);
4664 OldRef_newest(ref);
sewardjf98e1c02008-10-25 16:22:41 +00004665
4666 } else {
4667
4668 /* We don't have a record for this address. Create a new one. */
philippecabdbb52015-04-20 21:33:16 +00004669 ref = alloc_or_reuse_OldRef();
4670 ref->ga = a;
sewardjffce8152011-06-24 10:09:41 +00004671 ref->accs[0].thrid = thrid;
4672 ref->accs[0].szLg2B = szLg2B;
4673 ref->accs[0].isW = (UInt)(isW & 1);
4674 ref->accs[0].locksHeldW = locksHeldW;
4675 ref->accs[0].rcec = rcec;
4676
4677 /* thrid==0 is used to signify an empty slot, so we can't
4678 add zero thrid (such a ThrID is invalid anyway). */
4679 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4680
4681 /* Clear out the rest of the entries */
sewardjf98e1c02008-10-25 16:22:41 +00004682 for (j = 1; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004683 ref->accs[j].rcec = NULL;
4684 ref->accs[j].thrid = 0;
4685 ref->accs[j].szLg2B = 0;
4686 ref->accs[j].isW = 0;
4687 ref->accs[j].locksHeldW = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004688 }
sewardjbc307e52008-12-06 22:10:54 +00004689 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
philippecabdbb52015-04-20 21:33:16 +00004690 OldRef_newest (ref);
sewardjf98e1c02008-10-25 16:22:41 +00004691 }
4692}
4693
4694
sewardjffce8152011-06-24 10:09:41 +00004695/* Extract info from the conflicting-access machinery. */
sewardjc5ea9962008-12-07 01:41:46 +00004696Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjffce8152011-06-24 10:09:41 +00004697 /*OUT*/Thr** resThr,
4698 /*OUT*/SizeT* resSzB,
4699 /*OUT*/Bool* resIsW,
4700 /*OUT*/WordSetID* locksHeldW,
sewardjc5ea9962008-12-07 01:41:46 +00004701 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00004702{
sewardja781be62008-12-08 00:12:28 +00004703 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00004704 OldRef* ref;
philippe40648e22015-04-11 11:42:22 +00004705 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004706 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004707
sewardjffce8152011-06-24 10:09:41 +00004708 ThrID cand_thrid;
4709 RCEC* cand_rcec;
4710 Bool cand_isW;
4711 SizeT cand_szB;
4712 WordSetID cand_locksHeldW;
4713 Addr cand_a;
sewardja781be62008-12-08 00:12:28 +00004714
4715 Addr toCheck[15];
4716 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004717
4718 tl_assert(thr);
4719 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00004720
sewardjffce8152011-06-24 10:09:41 +00004721 ThrID thrid = thr->thrid;
4722
sewardja781be62008-12-08 00:12:28 +00004723 toCheck[nToCheck++] = a;
4724 for (i = -7; i < (Word)szB; i++) {
4725 if (i != 0)
4726 toCheck[nToCheck++] = a + i;
4727 }
4728 tl_assert(nToCheck <= 15);
4729
4730 /* Now see if we can find a suitable matching event for
4731 any of the addresses in toCheck[0 .. nToCheck-1]. */
4732 for (j = 0; j < nToCheck; j++) {
4733
4734 cand_a = toCheck[j];
4735 // VG_(printf)("test %ld %p\n", j, cand_a);
4736
philippe40648e22015-04-11 11:42:22 +00004737 b = VG_(lookupSWA)( oldrefTree, &valW, cand_a );
sewardja781be62008-12-08 00:12:28 +00004738 if (!b)
4739 continue;
4740
sewardjd86e3a22008-12-03 11:39:37 +00004741 ref = (OldRef*)valW;
sewardjffce8152011-06-24 10:09:41 +00004742 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
sewardjf98e1c02008-10-25 16:22:41 +00004743
sewardjffce8152011-06-24 10:09:41 +00004744 cand_thrid = 0; /* invalid; see comments in event_map_bind */
4745 cand_rcec = NULL;
4746 cand_isW = False;
4747 cand_szB = 0;
4748 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
sewardjf98e1c02008-10-25 16:22:41 +00004749
sewardjc5ea9962008-12-07 01:41:46 +00004750 for (i = 0; i < N_OLDREF_ACCS; i++) {
4751 Thr_n_RCEC* cand = &ref->accs[i];
sewardjffce8152011-06-24 10:09:41 +00004752 cand_rcec = cand->rcec;
4753 cand_thrid = cand->thrid;
4754 cand_isW = (Bool)cand->isW;
4755 cand_szB = 1 << cand->szLg2B;
4756 cand_locksHeldW = cand->locksHeldW;
sewardjc5ea9962008-12-07 01:41:46 +00004757
sewardjffce8152011-06-24 10:09:41 +00004758 if (cand_thrid == 0)
sewardjc5ea9962008-12-07 01:41:46 +00004759 /* This slot isn't in use. Ignore it. */
4760 continue;
4761
sewardjffce8152011-06-24 10:09:41 +00004762 if (cand_thrid == thrid)
sewardjc5ea9962008-12-07 01:41:46 +00004763 /* This is an access by the same thread, but we're only
4764 interested in accesses from other threads. Ignore. */
4765 continue;
4766
4767 if ((!cand_isW) && (!isW))
4768 /* We don't want to report a read racing against another
4769 read; that's stupid. So in this case move on. */
4770 continue;
4771
sewardja781be62008-12-08 00:12:28 +00004772 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
4773 /* No overlap with the access we're asking about. Ignore. */
4774 continue;
4775
sewardjc5ea9962008-12-07 01:41:46 +00004776 /* We have a match. Stop searching. */
4777 break;
4778 }
4779
4780 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
4781
sewardja781be62008-12-08 00:12:28 +00004782 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00004783 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00004784 /* return with success */
sewardjffce8152011-06-24 10:09:41 +00004785 tl_assert(cand_thrid);
sewardja781be62008-12-08 00:12:28 +00004786 tl_assert(cand_rcec);
4787 tl_assert(cand_rcec->magic == RCEC_MAGIC);
4788 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00004789 /* Count how many non-zero frames we have. */
4790 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4791 for (n = 0; n < maxNFrames; n++) {
4792 if (0 == cand_rcec->frames[n]) break;
4793 }
sewardjffce8152011-06-24 10:09:41 +00004794 *resEC = VG_(make_ExeContext_from_StackTrace)
4795 (cand_rcec->frames, n);
4796 *resThr = Thr__from_ThrID(cand_thrid);
4797 *resSzB = cand_szB;
4798 *resIsW = cand_isW;
4799 *locksHeldW = cand_locksHeldW;
sewardja781be62008-12-08 00:12:28 +00004800 return True;
4801 }
sewardjc5ea9962008-12-07 01:41:46 +00004802
sewardja781be62008-12-08 00:12:28 +00004803 /* consider next address in toCheck[] */
4804 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00004805
sewardja781be62008-12-08 00:12:28 +00004806 /* really didn't find anything. */
4807 return False;
sewardjf98e1c02008-10-25 16:22:41 +00004808}
4809
4810static void event_map_init ( void )
4811{
4812 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00004813
philippe6643e962012-01-17 21:16:30 +00004814 /* Context (RCEC) pool allocator */
4815 rcec_pool_allocator = VG_(newPA) (
4816 sizeof(RCEC),
4817 1000 /* RCECs per pool */,
4818 HG_(zalloc),
4819 "libhb.event_map_init.1 (RCEC pools)",
4820 HG_(free)
4821 );
sewardjd86e3a22008-12-03 11:39:37 +00004822
4823 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00004824 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00004825 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00004826 N_RCEC_TAB * sizeof(RCEC*) );
sewardjf98e1c02008-10-25 16:22:41 +00004827 for (i = 0; i < N_RCEC_TAB; i++)
4828 contextTab[i] = NULL;
4829
philippe6643e962012-01-17 21:16:30 +00004830 /* Oldref pool allocator */
4831 oldref_pool_allocator = VG_(newPA)(
4832 sizeof(OldRef),
4833 1000 /* OldRefs per pool */,
4834 HG_(zalloc),
4835 "libhb.event_map_init.3 (OldRef pools)",
4836 HG_(free)
4837 );
sewardjd86e3a22008-12-03 11:39:37 +00004838
sewardjd86e3a22008-12-03 11:39:37 +00004839 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00004840 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00004841 oldrefTree = VG_(newSWA)(
4842 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00004843 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00004844 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00004845 );
sewardjf98e1c02008-10-25 16:22:41 +00004846
sewardjf98e1c02008-10-25 16:22:41 +00004847 oldrefTreeN = 0;
philippecabdbb52015-04-20 21:33:16 +00004848 mru.prev = &lru;
4849 mru.next = NULL;
4850 lru.prev = NULL;
4851 lru.next = &mru;
4852 for (i = 0; i < N_OLDREF_ACCS; i++) {
4853 mru.accs[i] = (Thr_n_RCEC) {.rcec = NULL,
4854 .locksHeldW = 0,
4855 .thrid = 0,
4856 .szLg2B = 0,
4857 .isW = 0};
4858 lru.accs[i] = mru.accs[i];
4859 }
sewardjf98e1c02008-10-25 16:22:41 +00004860}
4861
philippecabdbb52015-04-20 21:33:16 +00004862static void event_map__check_reference_counts ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004863{
4864 RCEC* rcec;
4865 OldRef* oldref;
4866 Word i;
4867 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00004868 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00004869
4870 /* Set the 'check' reference counts to zero. Also, optionally
4871 check that the real reference counts are non-zero. We allow
4872 these to fall to zero before a GC, but the GC must get rid of
4873 all those that are zero, hence none should be zero after a
4874 GC. */
4875 for (i = 0; i < N_RCEC_TAB; i++) {
4876 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4877 nEnts++;
4878 tl_assert(rcec);
4879 tl_assert(rcec->magic == RCEC_MAGIC);
sewardjf98e1c02008-10-25 16:22:41 +00004880 rcec->rcX = 0;
4881 }
4882 }
4883
4884 /* check that the stats are sane */
4885 tl_assert(nEnts == stats__ctxt_tab_curr);
4886 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4887
4888 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00004889 VG_(initIterSWA)( oldrefTree );
4890 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004891 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004892 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004893 ThrID aThrID = oldref->accs[i].thrid;
4894 RCEC* aRef = oldref->accs[i].rcec;
4895 if (aThrID != 0) {
sewardjc5ea9962008-12-07 01:41:46 +00004896 tl_assert(aRef);
4897 tl_assert(aRef->magic == RCEC_MAGIC);
4898 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00004899 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004900 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00004901 }
4902 }
4903 }
4904
4905 /* compare check ref counts with actual */
4906 for (i = 0; i < N_RCEC_TAB; i++) {
4907 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4908 tl_assert(rcec->rc == rcec->rcX);
4909 }
4910 }
4911}
4912
sewardj8fd92d32008-11-20 23:17:01 +00004913__attribute__((noinline))
philippecabdbb52015-04-20 21:33:16 +00004914static void do_RCEC_GC ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004915{
philippecabdbb52015-04-20 21:33:16 +00004916 UInt i;
sewardjf98e1c02008-10-25 16:22:41 +00004917
philippecabdbb52015-04-20 21:33:16 +00004918 if (VG_(clo_stats)) {
4919 static UInt ctr = 1;
4920 VG_(message)(Vg_DebugMsg,
4921 "libhb: RCEC GC: #%u %lu slots,"
4922 " %lu cur ents(ref'd %lu),"
4923 " %lu max ents\n",
4924 ctr++,
4925 (UWord)N_RCEC_TAB,
4926 stats__ctxt_tab_curr, RCEC_referenced,
4927 stats__ctxt_tab_max );
sewardjf98e1c02008-10-25 16:22:41 +00004928 }
philippecabdbb52015-04-20 21:33:16 +00004929 tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004930
4931 /* Throw away all RCECs with zero reference counts */
4932 for (i = 0; i < N_RCEC_TAB; i++) {
4933 RCEC** pp = &contextTab[i];
4934 RCEC* p = *pp;
4935 while (p) {
4936 if (p->rc == 0) {
4937 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004938 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004939 p = *pp;
4940 tl_assert(stats__ctxt_tab_curr > 0);
philippe06bc23a2015-04-17 21:19:43 +00004941 stats__ctxt_rcdec_discards++;
sewardjf98e1c02008-10-25 16:22:41 +00004942 stats__ctxt_tab_curr--;
4943 } else {
4944 pp = &p->next;
4945 p = p->next;
4946 }
4947 }
4948 }
4949
philippecabdbb52015-04-20 21:33:16 +00004950 tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004951}
4952
sewardjf98e1c02008-10-25 16:22:41 +00004953/////////////////////////////////////////////////////////
4954// //
4955// Core MSM //
4956// //
4957/////////////////////////////////////////////////////////
4958
sewardj23f12002009-07-24 08:45:08 +00004959/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4960 Nov 08, and again after [...],
4961 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004962
sewardj23f12002009-07-24 08:45:08 +00004963static ULong stats__msmcread = 0;
4964static ULong stats__msmcread_change = 0;
4965static ULong stats__msmcwrite = 0;
4966static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004967
sewardj8ab2c132009-08-02 09:34:35 +00004968/* Some notes on the H1 history mechanism:
4969
4970 Transition rules are:
4971
4972 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4973 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4974
4975 After any access by a thread T to a location L, L's constraint pair
4976 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4977
4978 After a race by thread T conflicting with some previous access by
4979 some other thread U, for a location with constraint (before
4980 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4981 which the previously access lies.
4982
4983 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4984 are compared so as to find out which thread(s) this access
4985 conflicts with. Once that is established, we also require the
4986 pre-update Cw for the location, so we can index into it for those
4987 threads, to get the scalar clock values for the point at which the
4988 former accesses were made. (In fact we only bother to do any of
4989 this for an arbitrarily chosen one of the conflicting threads, as
4990 that's simpler, it avoids flooding the user with vast amounts of
4991 mostly useless information, and because the program is wrong if it
4992 contains any races at all -- so we don't really need to show all
4993 conflicting access pairs initially, so long as we only show none if
4994 none exist).
4995
4996 ---
4997
4998 That requires the auxiliary proof that
4999
5000 (Cr `join` Kw)[T] == Kw[T]
5001
5002 Why should that be true? Because for any thread T, Kw[T] >= the
5003 scalar clock value for T known by any other thread. In other
5004 words, because T's value for its own scalar clock is at least as up
5005 to date as the value for it known by any other thread (that is true
5006 for both the R- and W- scalar clocks). Hence no other thread will
5007 be able to feed in a value for that element (indirectly via a
5008 constraint) which will exceed Kw[T], and hence the join cannot
5009 cause that particular element to advance.
5010*/
5011
sewardjf98e1c02008-10-25 16:22:41 +00005012__attribute__((noinline))
5013static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00005014 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00005015 VtsID Cfailed,
5016 VtsID Kfailed,
5017 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00005018{
sewardjc5ea9962008-12-07 01:41:46 +00005019 /* Call here to report a race. We just hand it onwards to
5020 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00005021 error is going to be collected, then, at history_level 2, that
5022 queries the conflicting-event map. The alternative would be to
5023 query it right here. But that causes a lot of pointless queries
5024 for errors which will shortly be discarded as duplicates, and
5025 can become a performance overhead; so we defer the query until
5026 we know the error is not a duplicate. */
5027
5028 /* Stacks for the bounds of the (or one of the) conflicting
5029 segment(s). These are only set at history_level 1. */
5030 ExeContext* hist1_seg_start = NULL;
5031 ExeContext* hist1_seg_end = NULL;
5032 Thread* hist1_conf_thr = NULL;
5033
5034 tl_assert(acc_thr);
sewardj60626642011-03-10 15:14:37 +00005035 tl_assert(acc_thr->hgthread);
5036 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
sewardj23f12002009-07-24 08:45:08 +00005037 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
5038
5039 if (HG_(clo_history_level) == 1) {
5040 Bool found;
5041 Word firstIx, lastIx;
5042 ULong_n_EC key;
5043
5044 /* At history_level 1, we must round up the relevant stack-pair
5045 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00005046 deferring it is complex; we can't (easily) put Kfailed and
5047 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00005048 getting tied up in difficulties with VtsID reference
5049 counting. So just do it now. */
5050 Thr* confThr;
5051 ULong confTym = 0;
5052 /* Which thread are we in conflict with? There may be more than
5053 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
5054 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00005055 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00005056 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00005057 conflict (semantics of return value of
5058 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
5059 called us, just checked exactly this -- that there was in
5060 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00005061 tl_assert(confThr);
5062
5063 /* Get the scalar clock value that the conflicting thread
5064 introduced into the constraint. A careful examination of the
5065 base machine rules shows that this must be the same as the
5066 conflicting thread's scalar clock when it created this
5067 constraint. Hence we know the scalar clock of the
5068 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00005069 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00005070
5071 /* Using this scalar clock, index into the conflicting thread's
5072 collection of stack traces made each time its vector clock
5073 (hence its scalar clock) changed. This gives the stack
5074 traces at the start and end of the conflicting segment (well,
5075 as per comment just above, of one of the conflicting
5076 segments, if there are more than one). */
5077 key.ull = confTym;
5078 key.ec = NULL;
5079 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00005080 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00005081 firstIx = lastIx = 0;
5082 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00005083 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00005084 &key, &firstIx, &lastIx,
florian6bd9dc12012-11-23 16:17:43 +00005085 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
sewardj23f12002009-07-24 08:45:08 +00005086 );
sewardj8ab2c132009-08-02 09:34:35 +00005087 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00005088 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00005089 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00005090 confThr, confTym, found, firstIx, lastIx);
5091 /* We can't indefinitely collect stack traces at VTS
5092 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00005093 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00005094 ones, which in turn means we might fail to find index value
5095 confTym in the array. */
5096 if (found) {
5097 ULong_n_EC *pair_start, *pair_end;
5098 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00005099 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00005100 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00005101 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00005102 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00005103 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00005104 lastIx+1 );
5105 /* from properties of VG_(lookupXA) and the comparison fn used: */
5106 tl_assert(pair_start->ull < pair_end->ull);
5107 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00005108 /* Could do a bit better here. It may be that pair_end
5109 doesn't have a stack, but the following entries in the
5110 array have the same scalar Kw and to have a stack. So
5111 we should search a bit further along the array than
5112 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00005113 } else {
sewardjffce8152011-06-24 10:09:41 +00005114 if (!confThr->llexit_done)
sewardj23f12002009-07-24 08:45:08 +00005115 hist1_seg_end = main_get_EC( confThr );
5116 }
5117 // seg_start could be NULL iff this is the first stack in the thread
5118 //if (seg_start) VG_(pp_ExeContext)(seg_start);
5119 //if (seg_end) VG_(pp_ExeContext)(seg_end);
sewardj60626642011-03-10 15:14:37 +00005120 hist1_conf_thr = confThr->hgthread;
sewardj23f12002009-07-24 08:45:08 +00005121 }
5122 }
5123
sewardj60626642011-03-10 15:14:37 +00005124 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00005125 szB, isWrite,
5126 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00005127}
5128
5129static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00005130 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00005131 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00005132 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
5133 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00005134}
5135
5136
5137/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00005138static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005139 /* The following are only needed for
5140 creating error reports. */
5141 Thr* acc_thr,
5142 Addr acc_addr, SizeT szB )
5143{
5144 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005145 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00005146
5147 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005148 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005149 tl_assert(is_sane_SVal_C(svOld));
5150 }
5151
sewardj1c0ce7a2009-07-01 08:10:49 +00005152 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005153 VtsID tviR = acc_thr->viR;
5154 VtsID tviW = acc_thr->viW;
5155 VtsID rmini = SVal__unC_Rmin(svOld);
5156 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005157 Bool leq = VtsID__cmpLEQ(rmini,tviR);
5158 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005159 /* no race */
5160 /* Note: RWLOCK subtlety: use tviW, not tviR */
5161 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5162 goto out;
5163 } else {
sewardjb0e009d2008-11-19 16:35:15 +00005164 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005165 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5166 tl_assert(leqxx);
5167 // same as in non-race case
5168 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5169 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005170 rmini, /* Cfailed */
5171 tviR, /* Kfailed */
5172 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005173 goto out;
5174 }
5175 }
5176 if (SVal__isA(svOld)) {
5177 /* reading no-access memory (sigh); leave unchanged */
5178 /* check for no pollution */
5179 tl_assert(svOld == SVal_NOACCESS);
5180 svNew = SVal_NOACCESS;
5181 goto out;
5182 }
sewardj23f12002009-07-24 08:45:08 +00005183 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005184 tl_assert(0);
5185
5186 out:
sewardj8f5374e2008-12-07 11:40:17 +00005187 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005188 tl_assert(is_sane_SVal_C(svNew));
5189 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005190 if (UNLIKELY(svNew != svOld)) {
5191 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005192 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005193 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005194 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005195 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005196 }
5197 }
5198 return svNew;
5199}
5200
5201
5202/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00005203static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005204 /* The following are only needed for
5205 creating error reports. */
5206 Thr* acc_thr,
5207 Addr acc_addr, SizeT szB )
5208{
5209 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005210 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00005211
5212 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005213 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005214 tl_assert(is_sane_SVal_C(svOld));
5215 }
5216
sewardj1c0ce7a2009-07-01 08:10:49 +00005217 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005218 VtsID tviW = acc_thr->viW;
5219 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005220 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5221 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005222 /* no race */
5223 svNew = SVal__mkC( tviW, tviW );
5224 goto out;
5225 } else {
5226 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00005227 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005228 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5229 tl_assert(leqxx);
5230 // same as in non-race case
5231 // proof: in the non-race case, we have
5232 // rmini <= wmini (invar on constraints)
5233 // tviW <= tviR (invar on thread clocks)
5234 // wmini <= tviW (from run-time check)
5235 // hence from transitivity of <= we have
5236 // rmini <= wmini <= tviW
5237 // and so join(rmini,tviW) == tviW
5238 // and join(wmini,tviW) == tviW
5239 // qed.
5240 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5241 VtsID__join2(wmini, tviW) );
5242 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005243 wmini, /* Cfailed */
5244 tviW, /* Kfailed */
5245 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005246 goto out;
5247 }
5248 }
5249 if (SVal__isA(svOld)) {
5250 /* writing no-access memory (sigh); leave unchanged */
5251 /* check for no pollution */
5252 tl_assert(svOld == SVal_NOACCESS);
5253 svNew = SVal_NOACCESS;
5254 goto out;
5255 }
sewardj23f12002009-07-24 08:45:08 +00005256 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005257 tl_assert(0);
5258
5259 out:
sewardj8f5374e2008-12-07 11:40:17 +00005260 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005261 tl_assert(is_sane_SVal_C(svNew));
5262 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005263 if (UNLIKELY(svNew != svOld)) {
5264 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005265 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005266 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005267 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005268 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005269 }
5270 }
5271 return svNew;
5272}
5273
5274
5275/////////////////////////////////////////////////////////
5276// //
5277// Apply core MSM to specific memory locations //
5278// //
5279/////////////////////////////////////////////////////////
5280
sewardj23f12002009-07-24 08:45:08 +00005281/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005282
sewardj23f12002009-07-24 08:45:08 +00005283static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005284 CacheLine* cl;
5285 UWord cloff, tno, toff;
5286 SVal svOld, svNew;
5287 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005288 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005289 cl = get_cacheline(a);
5290 cloff = get_cacheline_offset(a);
5291 tno = get_treeno(a);
5292 toff = get_tree_offset(a); /* == 0 .. 7 */
5293 descr = cl->descrs[tno];
5294 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5295 SVal* tree = &cl->svals[tno << 3];
5296 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005297 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005298 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5299 }
5300 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005301 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005302 if (CHECK_ZSM)
5303 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005304 cl->svals[cloff] = svNew;
5305}
5306
sewardj23f12002009-07-24 08:45:08 +00005307static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005308 CacheLine* cl;
5309 UWord cloff, tno, toff;
5310 SVal svOld, svNew;
5311 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005312 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005313 cl = get_cacheline(a);
5314 cloff = get_cacheline_offset(a);
5315 tno = get_treeno(a);
5316 toff = get_tree_offset(a); /* == 0 .. 7 */
5317 descr = cl->descrs[tno];
5318 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5319 SVal* tree = &cl->svals[tno << 3];
5320 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005321 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005322 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5323 }
5324 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005325 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005326 if (CHECK_ZSM)
5327 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005328 cl->svals[cloff] = svNew;
5329}
5330
sewardj23f12002009-07-24 08:45:08 +00005331/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005332
sewardj23f12002009-07-24 08:45:08 +00005333static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005334 CacheLine* cl;
5335 UWord cloff, tno, toff;
5336 SVal svOld, svNew;
5337 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005338 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005339 if (UNLIKELY(!aligned16(a))) goto slowcase;
5340 cl = get_cacheline(a);
5341 cloff = get_cacheline_offset(a);
5342 tno = get_treeno(a);
5343 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5344 descr = cl->descrs[tno];
5345 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5346 if (valid_value_is_below_me_16(descr, toff)) {
5347 goto slowcase;
5348 } else {
5349 SVal* tree = &cl->svals[tno << 3];
5350 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5351 }
sewardj8f5374e2008-12-07 11:40:17 +00005352 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005353 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5354 }
5355 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005356 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005357 if (CHECK_ZSM)
5358 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005359 cl->svals[cloff] = svNew;
5360 return;
5361 slowcase: /* misaligned, or must go further down the tree */
5362 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005363 zsm_sapply08__msmcread( thr, a + 0 );
5364 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005365}
5366
sewardj23f12002009-07-24 08:45:08 +00005367static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005368 CacheLine* cl;
5369 UWord cloff, tno, toff;
5370 SVal svOld, svNew;
5371 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005372 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005373 if (UNLIKELY(!aligned16(a))) goto slowcase;
5374 cl = get_cacheline(a);
5375 cloff = get_cacheline_offset(a);
5376 tno = get_treeno(a);
5377 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5378 descr = cl->descrs[tno];
5379 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5380 if (valid_value_is_below_me_16(descr, toff)) {
5381 goto slowcase;
5382 } else {
5383 SVal* tree = &cl->svals[tno << 3];
5384 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5385 }
sewardj8f5374e2008-12-07 11:40:17 +00005386 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005387 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5388 }
5389 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005390 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005391 if (CHECK_ZSM)
5392 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005393 cl->svals[cloff] = svNew;
5394 return;
5395 slowcase: /* misaligned, or must go further down the tree */
5396 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005397 zsm_sapply08__msmcwrite( thr, a + 0 );
5398 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005399}
5400
sewardj23f12002009-07-24 08:45:08 +00005401/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005402
sewardj23f12002009-07-24 08:45:08 +00005403static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005404 CacheLine* cl;
5405 UWord cloff, tno, toff;
5406 SVal svOld, svNew;
5407 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005408 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005409 if (UNLIKELY(!aligned32(a))) goto slowcase;
5410 cl = get_cacheline(a);
5411 cloff = get_cacheline_offset(a);
5412 tno = get_treeno(a);
5413 toff = get_tree_offset(a); /* == 0 or 4 */
5414 descr = cl->descrs[tno];
5415 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5416 if (valid_value_is_above_me_32(descr, toff)) {
5417 SVal* tree = &cl->svals[tno << 3];
5418 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5419 } else {
5420 goto slowcase;
5421 }
sewardj8f5374e2008-12-07 11:40:17 +00005422 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005423 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5424 }
5425 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005426 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005427 if (CHECK_ZSM)
5428 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005429 cl->svals[cloff] = svNew;
5430 return;
5431 slowcase: /* misaligned, or must go further down the tree */
5432 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005433 zsm_sapply16__msmcread( thr, a + 0 );
5434 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005435}
5436
sewardj23f12002009-07-24 08:45:08 +00005437static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005438 CacheLine* cl;
5439 UWord cloff, tno, toff;
5440 SVal svOld, svNew;
5441 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005442 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005443 if (UNLIKELY(!aligned32(a))) goto slowcase;
5444 cl = get_cacheline(a);
5445 cloff = get_cacheline_offset(a);
5446 tno = get_treeno(a);
5447 toff = get_tree_offset(a); /* == 0 or 4 */
5448 descr = cl->descrs[tno];
5449 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5450 if (valid_value_is_above_me_32(descr, toff)) {
5451 SVal* tree = &cl->svals[tno << 3];
5452 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5453 } else {
5454 goto slowcase;
5455 }
sewardj8f5374e2008-12-07 11:40:17 +00005456 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005457 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5458 }
5459 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005460 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005461 if (CHECK_ZSM)
5462 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005463 cl->svals[cloff] = svNew;
5464 return;
5465 slowcase: /* misaligned, or must go further down the tree */
5466 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005467 zsm_sapply16__msmcwrite( thr, a + 0 );
5468 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005469}
5470
sewardj23f12002009-07-24 08:45:08 +00005471/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005472
sewardj23f12002009-07-24 08:45:08 +00005473static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005474 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005475 UWord cloff, tno;
5476 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005477 SVal svOld, svNew;
5478 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005479 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005480 if (UNLIKELY(!aligned64(a))) goto slowcase;
5481 cl = get_cacheline(a);
5482 cloff = get_cacheline_offset(a);
5483 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005484 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005485 descr = cl->descrs[tno];
5486 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5487 goto slowcase;
5488 }
5489 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005490 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005491 if (CHECK_ZSM)
5492 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005493 cl->svals[cloff] = svNew;
5494 return;
5495 slowcase: /* misaligned, or must go further down the tree */
5496 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005497 zsm_sapply32__msmcread( thr, a + 0 );
5498 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005499}
5500
sewardj23f12002009-07-24 08:45:08 +00005501static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005502 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005503 UWord cloff, tno;
5504 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005505 SVal svOld, svNew;
5506 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005507 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005508 if (UNLIKELY(!aligned64(a))) goto slowcase;
5509 cl = get_cacheline(a);
5510 cloff = get_cacheline_offset(a);
5511 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005512 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005513 descr = cl->descrs[tno];
5514 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5515 goto slowcase;
5516 }
5517 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005518 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005519 if (CHECK_ZSM)
5520 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005521 cl->svals[cloff] = svNew;
5522 return;
5523 slowcase: /* misaligned, or must go further down the tree */
5524 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005525 zsm_sapply32__msmcwrite( thr, a + 0 );
5526 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005527}
5528
sewardj23f12002009-07-24 08:45:08 +00005529/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005530
5531static
sewardj23f12002009-07-24 08:45:08 +00005532void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005533 CacheLine* cl;
5534 UWord cloff, tno, toff;
5535 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005536 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005537 cl = get_cacheline(a);
5538 cloff = get_cacheline_offset(a);
5539 tno = get_treeno(a);
5540 toff = get_tree_offset(a); /* == 0 .. 7 */
5541 descr = cl->descrs[tno];
5542 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5543 SVal* tree = &cl->svals[tno << 3];
5544 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005545 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005546 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5547 }
5548 tl_assert(svNew != SVal_INVALID);
5549 cl->svals[cloff] = svNew;
5550}
5551
sewardj23f12002009-07-24 08:45:08 +00005552/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005553
5554static
sewardj23f12002009-07-24 08:45:08 +00005555void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005556 CacheLine* cl;
5557 UWord cloff, tno, toff;
5558 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005559 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005560 if (UNLIKELY(!aligned16(a))) goto slowcase;
5561 cl = get_cacheline(a);
5562 cloff = get_cacheline_offset(a);
5563 tno = get_treeno(a);
5564 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5565 descr = cl->descrs[tno];
5566 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5567 if (valid_value_is_below_me_16(descr, toff)) {
5568 /* Writing at this level. Need to fix up 'descr'. */
5569 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5570 /* At this point, the tree does not match cl->descr[tno] any
5571 more. The assignments below will fix it up. */
5572 } else {
5573 /* We can't indiscriminately write on the w16 node as in the
5574 w64 case, as that might make the node inconsistent with
5575 its parent. So first, pull down to this level. */
5576 SVal* tree = &cl->svals[tno << 3];
5577 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005578 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005579 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5580 }
5581 }
5582 tl_assert(svNew != SVal_INVALID);
5583 cl->svals[cloff + 0] = svNew;
5584 cl->svals[cloff + 1] = SVal_INVALID;
5585 return;
5586 slowcase: /* misaligned */
5587 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005588 zsm_swrite08( a + 0, svNew );
5589 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005590}
5591
sewardj23f12002009-07-24 08:45:08 +00005592/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005593
5594static
sewardj23f12002009-07-24 08:45:08 +00005595void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005596 CacheLine* cl;
5597 UWord cloff, tno, toff;
5598 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005599 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005600 if (UNLIKELY(!aligned32(a))) goto slowcase;
5601 cl = get_cacheline(a);
5602 cloff = get_cacheline_offset(a);
5603 tno = get_treeno(a);
5604 toff = get_tree_offset(a); /* == 0 or 4 */
5605 descr = cl->descrs[tno];
5606 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5607 if (valid_value_is_above_me_32(descr, toff)) {
5608 /* We can't indiscriminately write on the w32 node as in the
5609 w64 case, as that might make the node inconsistent with
5610 its parent. So first, pull down to this level. */
5611 SVal* tree = &cl->svals[tno << 3];
5612 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005613 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005614 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5615 } else {
5616 /* Writing at this level. Need to fix up 'descr'. */
5617 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5618 /* At this point, the tree does not match cl->descr[tno] any
5619 more. The assignments below will fix it up. */
5620 }
5621 }
5622 tl_assert(svNew != SVal_INVALID);
5623 cl->svals[cloff + 0] = svNew;
5624 cl->svals[cloff + 1] = SVal_INVALID;
5625 cl->svals[cloff + 2] = SVal_INVALID;
5626 cl->svals[cloff + 3] = SVal_INVALID;
5627 return;
5628 slowcase: /* misaligned */
5629 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005630 zsm_swrite16( a + 0, svNew );
5631 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005632}
5633
sewardj23f12002009-07-24 08:45:08 +00005634/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005635
5636static
sewardj23f12002009-07-24 08:45:08 +00005637void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005638 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005639 UWord cloff, tno;
5640 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00005641 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005642 if (UNLIKELY(!aligned64(a))) goto slowcase;
5643 cl = get_cacheline(a);
5644 cloff = get_cacheline_offset(a);
5645 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005646 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005647 cl->descrs[tno] = TREE_DESCR_64;
5648 tl_assert(svNew != SVal_INVALID);
5649 cl->svals[cloff + 0] = svNew;
5650 cl->svals[cloff + 1] = SVal_INVALID;
5651 cl->svals[cloff + 2] = SVal_INVALID;
5652 cl->svals[cloff + 3] = SVal_INVALID;
5653 cl->svals[cloff + 4] = SVal_INVALID;
5654 cl->svals[cloff + 5] = SVal_INVALID;
5655 cl->svals[cloff + 6] = SVal_INVALID;
5656 cl->svals[cloff + 7] = SVal_INVALID;
5657 return;
5658 slowcase: /* misaligned */
5659 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005660 zsm_swrite32( a + 0, svNew );
5661 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005662}
5663
sewardj23f12002009-07-24 08:45:08 +00005664/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005665
5666static
sewardj23f12002009-07-24 08:45:08 +00005667SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005668 CacheLine* cl;
5669 UWord cloff, tno, toff;
5670 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005671 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005672 cl = get_cacheline(a);
5673 cloff = get_cacheline_offset(a);
5674 tno = get_treeno(a);
5675 toff = get_tree_offset(a); /* == 0 .. 7 */
5676 descr = cl->descrs[tno];
5677 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5678 SVal* tree = &cl->svals[tno << 3];
5679 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5680 }
5681 return cl->svals[cloff];
5682}
5683
sewardj23f12002009-07-24 08:45:08 +00005684static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00005685 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00005686 stats__cline_scopy08s++;
5687 sv = zsm_sread08( src );
5688 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00005689}
5690
5691
sewardj23f12002009-07-24 08:45:08 +00005692/* Block-copy states (needed for implementing realloc()). Note this
5693 doesn't change the filtering arrangements. The caller of
5694 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00005695
sewardj23f12002009-07-24 08:45:08 +00005696static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005697{
5698 SizeT i;
5699 if (len == 0)
5700 return;
5701
5702 /* assert for non-overlappingness */
5703 tl_assert(src+len <= dst || dst+len <= src);
5704
5705 /* To be simple, just copy byte by byte. But so as not to wreck
5706 performance for later accesses to dst[0 .. len-1], normalise
5707 destination lines as we finish with them, and also normalise the
5708 line containing the first and last address. */
5709 for (i = 0; i < len; i++) {
5710 Bool normalise
5711 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5712 || i == 0 /* first in range */
5713 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00005714 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00005715 }
5716}
5717
5718
5719/* For setting address ranges to a given value. Has considerable
5720 sophistication so as to avoid generating large numbers of pointless
5721 cache loads/writebacks for large ranges. */
5722
5723/* Do small ranges in-cache, in the obvious way. */
5724static
sewardj23f12002009-07-24 08:45:08 +00005725void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005726{
5727 /* fast track a couple of common cases */
5728 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00005729 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005730 return;
5731 }
5732 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00005733 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005734 return;
5735 }
5736
5737 /* be completely general (but as efficient as possible) */
5738 if (len == 0) return;
5739
5740 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005741 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005742 a += 1;
5743 len -= 1;
5744 tl_assert(aligned16(a));
5745 }
5746 if (len == 0) return;
5747
5748 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005749 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005750 a += 2;
5751 len -= 2;
5752 tl_assert(aligned32(a));
5753 }
5754 if (len == 0) return;
5755
5756 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005757 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005758 a += 4;
5759 len -= 4;
5760 tl_assert(aligned64(a));
5761 }
5762 if (len == 0) return;
5763
5764 if (len >= 8) {
5765 tl_assert(aligned64(a));
5766 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00005767 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005768 a += 8;
5769 len -= 8;
5770 }
5771 tl_assert(aligned64(a));
5772 }
5773 if (len == 0) return;
5774
5775 if (len >= 4)
5776 tl_assert(aligned32(a));
5777 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005778 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005779 a += 4;
5780 len -= 4;
5781 }
5782 if (len == 0) return;
5783
5784 if (len >= 2)
5785 tl_assert(aligned16(a));
5786 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005787 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005788 a += 2;
5789 len -= 2;
5790 }
5791 if (len == 0) return;
5792
5793 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005794 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00005795 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00005796 len -= 1;
5797 }
5798 tl_assert(len == 0);
5799}
5800
5801
sewardj23f12002009-07-24 08:45:08 +00005802/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00005803 for larger ranges, try to operate directly on the out-of-cache
5804 representation, rather than dragging lines into the cache,
5805 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00005806 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00005807
sewardj23f12002009-07-24 08:45:08 +00005808 Note that this doesn't change the filtering arrangements. The
5809 caller of zsm_sset_range needs to attend to that. */
5810
5811static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005812{
5813 tl_assert(svNew != SVal_INVALID);
5814 stats__cache_make_New_arange += (ULong)len;
5815
5816 if (0 && len > 500)
5817 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
5818
5819 if (0) {
5820 static UWord n_New_in_cache = 0;
5821 static UWord n_New_not_in_cache = 0;
5822 /* tag is 'a' with the in-line offset masked out,
5823 eg a[31]..a[4] 0000 */
5824 Addr tag = a & ~(N_LINE_ARANGE - 1);
5825 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5826 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5827 n_New_in_cache++;
5828 } else {
5829 n_New_not_in_cache++;
5830 }
5831 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5832 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5833 n_New_in_cache, n_New_not_in_cache );
5834 }
5835
5836 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00005837 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005838 } else {
5839 Addr before_start = a;
5840 Addr aligned_start = cacheline_ROUNDUP(a);
5841 Addr after_start = cacheline_ROUNDDN(a + len);
5842 UWord before_len = aligned_start - before_start;
5843 UWord aligned_len = after_start - aligned_start;
5844 UWord after_len = a + len - after_start;
5845 tl_assert(before_start <= aligned_start);
5846 tl_assert(aligned_start <= after_start);
5847 tl_assert(before_len < N_LINE_ARANGE);
5848 tl_assert(after_len < N_LINE_ARANGE);
5849 tl_assert(get_cacheline_offset(aligned_start) == 0);
5850 if (get_cacheline_offset(a) == 0) {
5851 tl_assert(before_len == 0);
5852 tl_assert(a == aligned_start);
5853 }
5854 if (get_cacheline_offset(a+len) == 0) {
5855 tl_assert(after_len == 0);
5856 tl_assert(after_start == a+len);
5857 }
5858 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005859 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005860 }
5861 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005862 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005863 }
5864 stats__cache_make_New_inZrep += (ULong)aligned_len;
5865
5866 while (1) {
5867 Addr tag;
5868 UWord wix;
5869 if (aligned_start >= after_start)
5870 break;
5871 tl_assert(get_cacheline_offset(aligned_start) == 0);
5872 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5873 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5874 if (tag == cache_shmem.tags0[wix]) {
5875 UWord i;
5876 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005877 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005878 } else {
5879 UWord i;
5880 Word zix;
5881 SecMap* sm;
5882 LineZ* lineZ;
5883 /* This line is not in the cache. Do not force it in; instead
5884 modify it in-place. */
5885 /* find the Z line to write in and rcdec it or the
5886 associated F line. */
5887 find_Z_for_writing( &sm, &zix, tag );
5888 tl_assert(sm);
5889 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5890 lineZ = &sm->linesZ[zix];
5891 lineZ->dict[0] = svNew;
5892 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5893 for (i = 0; i < N_LINE_ARANGE/4; i++)
5894 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5895 rcinc_LineZ(lineZ);
5896 }
5897 aligned_start += N_LINE_ARANGE;
5898 aligned_len -= N_LINE_ARANGE;
5899 }
5900 tl_assert(aligned_start == after_start);
5901 tl_assert(aligned_len == 0);
5902 }
5903}
5904
5905
5906/////////////////////////////////////////////////////////
5907// //
sewardj23f12002009-07-24 08:45:08 +00005908// Front-filtering accesses //
5909// //
5910/////////////////////////////////////////////////////////
5911
5912static UWord stats__f_ac = 0;
5913static UWord stats__f_sk = 0;
5914
5915#if 0
5916# define STATS__F_SHOW \
5917 do { \
5918 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5919 VG_(printf)("filters: ac %lu sk %lu\n", \
5920 stats__f_ac, stats__f_sk); \
5921 } while (0)
5922#else
5923# define STATS__F_SHOW /* */
5924#endif
5925
5926void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5927 stats__f_ac++;
5928 STATS__F_SHOW;
5929 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5930 stats__f_sk++;
5931 return;
5932 }
5933 zsm_sapply08__msmcwrite(thr, a);
5934}
5935
5936void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5937 stats__f_ac++;
5938 STATS__F_SHOW;
5939 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5940 stats__f_sk++;
5941 return;
5942 }
5943 zsm_sapply16__msmcwrite(thr, a);
5944}
5945
5946void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5947 stats__f_ac++;
5948 STATS__F_SHOW;
5949 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5950 stats__f_sk++;
5951 return;
5952 }
5953 zsm_sapply32__msmcwrite(thr, a);
5954}
5955
5956void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5957 stats__f_ac++;
5958 STATS__F_SHOW;
5959 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5960 stats__f_sk++;
5961 return;
5962 }
5963 zsm_sapply64__msmcwrite(thr, a);
5964}
5965
5966void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5967{
5968 /* fast track a couple of common cases */
5969 if (len == 4 && aligned32(a)) {
5970 zsm_sapply32_f__msmcwrite( thr, a );
5971 return;
5972 }
5973 if (len == 8 && aligned64(a)) {
5974 zsm_sapply64_f__msmcwrite( thr, a );
5975 return;
5976 }
5977
5978 /* be completely general (but as efficient as possible) */
5979 if (len == 0) return;
5980
5981 if (!aligned16(a) && len >= 1) {
5982 zsm_sapply08_f__msmcwrite( thr, a );
5983 a += 1;
5984 len -= 1;
5985 tl_assert(aligned16(a));
5986 }
5987 if (len == 0) return;
5988
5989 if (!aligned32(a) && len >= 2) {
5990 zsm_sapply16_f__msmcwrite( thr, a );
5991 a += 2;
5992 len -= 2;
5993 tl_assert(aligned32(a));
5994 }
5995 if (len == 0) return;
5996
5997 if (!aligned64(a) && len >= 4) {
5998 zsm_sapply32_f__msmcwrite( thr, a );
5999 a += 4;
6000 len -= 4;
6001 tl_assert(aligned64(a));
6002 }
6003 if (len == 0) return;
6004
6005 if (len >= 8) {
6006 tl_assert(aligned64(a));
6007 while (len >= 8) {
6008 zsm_sapply64_f__msmcwrite( thr, a );
6009 a += 8;
6010 len -= 8;
6011 }
6012 tl_assert(aligned64(a));
6013 }
6014 if (len == 0) return;
6015
6016 if (len >= 4)
6017 tl_assert(aligned32(a));
6018 if (len >= 4) {
6019 zsm_sapply32_f__msmcwrite( thr, a );
6020 a += 4;
6021 len -= 4;
6022 }
6023 if (len == 0) return;
6024
6025 if (len >= 2)
6026 tl_assert(aligned16(a));
6027 if (len >= 2) {
6028 zsm_sapply16_f__msmcwrite( thr, a );
6029 a += 2;
6030 len -= 2;
6031 }
6032 if (len == 0) return;
6033
6034 if (len >= 1) {
6035 zsm_sapply08_f__msmcwrite( thr, a );
6036 //a += 1;
6037 len -= 1;
6038 }
6039 tl_assert(len == 0);
6040}
6041
6042void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
6043 stats__f_ac++;
6044 STATS__F_SHOW;
6045 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
6046 stats__f_sk++;
6047 return;
6048 }
6049 zsm_sapply08__msmcread(thr, a);
6050}
6051
6052void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
6053 stats__f_ac++;
6054 STATS__F_SHOW;
6055 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
6056 stats__f_sk++;
6057 return;
6058 }
6059 zsm_sapply16__msmcread(thr, a);
6060}
6061
6062void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
6063 stats__f_ac++;
6064 STATS__F_SHOW;
6065 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
6066 stats__f_sk++;
6067 return;
6068 }
6069 zsm_sapply32__msmcread(thr, a);
6070}
6071
6072void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
6073 stats__f_ac++;
6074 STATS__F_SHOW;
6075 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
6076 stats__f_sk++;
6077 return;
6078 }
6079 zsm_sapply64__msmcread(thr, a);
6080}
6081
6082void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
6083{
6084 /* fast track a couple of common cases */
6085 if (len == 4 && aligned32(a)) {
6086 zsm_sapply32_f__msmcread( thr, a );
6087 return;
6088 }
6089 if (len == 8 && aligned64(a)) {
6090 zsm_sapply64_f__msmcread( thr, a );
6091 return;
6092 }
6093
6094 /* be completely general (but as efficient as possible) */
6095 if (len == 0) return;
6096
6097 if (!aligned16(a) && len >= 1) {
6098 zsm_sapply08_f__msmcread( thr, a );
6099 a += 1;
6100 len -= 1;
6101 tl_assert(aligned16(a));
6102 }
6103 if (len == 0) return;
6104
6105 if (!aligned32(a) && len >= 2) {
6106 zsm_sapply16_f__msmcread( thr, a );
6107 a += 2;
6108 len -= 2;
6109 tl_assert(aligned32(a));
6110 }
6111 if (len == 0) return;
6112
6113 if (!aligned64(a) && len >= 4) {
6114 zsm_sapply32_f__msmcread( thr, a );
6115 a += 4;
6116 len -= 4;
6117 tl_assert(aligned64(a));
6118 }
6119 if (len == 0) return;
6120
6121 if (len >= 8) {
6122 tl_assert(aligned64(a));
6123 while (len >= 8) {
6124 zsm_sapply64_f__msmcread( thr, a );
6125 a += 8;
6126 len -= 8;
6127 }
6128 tl_assert(aligned64(a));
6129 }
6130 if (len == 0) return;
6131
6132 if (len >= 4)
6133 tl_assert(aligned32(a));
6134 if (len >= 4) {
6135 zsm_sapply32_f__msmcread( thr, a );
6136 a += 4;
6137 len -= 4;
6138 }
6139 if (len == 0) return;
6140
6141 if (len >= 2)
6142 tl_assert(aligned16(a));
6143 if (len >= 2) {
6144 zsm_sapply16_f__msmcread( thr, a );
6145 a += 2;
6146 len -= 2;
6147 }
6148 if (len == 0) return;
6149
6150 if (len >= 1) {
6151 zsm_sapply08_f__msmcread( thr, a );
6152 //a += 1;
6153 len -= 1;
6154 }
6155 tl_assert(len == 0);
6156}
6157
6158void libhb_Thr_resumes ( Thr* thr )
6159{
6160 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006161 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006162 tl_assert(!thr->llexit_done);
sewardj23f12002009-07-24 08:45:08 +00006163 Filter__clear(thr->filter, "libhb_Thr_resumes");
6164 /* A kludge, but .. if this thread doesn't have any marker stacks
6165 at all, get one right now. This is easier than figuring out
6166 exactly when at thread startup we can and can't take a stack
6167 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00006168 if (HG_(clo_history_level) == 1) {
6169 tl_assert(thr->local_Kws_n_stacks);
6170 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
6171 note_local_Kw_n_stack_for(thr);
6172 }
sewardj23f12002009-07-24 08:45:08 +00006173}
6174
6175
6176/////////////////////////////////////////////////////////
6177// //
sewardjf98e1c02008-10-25 16:22:41 +00006178// Synchronisation objects //
6179// //
6180/////////////////////////////////////////////////////////
6181
sewardjffce8152011-06-24 10:09:41 +00006182/* A double linked list of all the SO's. */
6183SO* admin_SO = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006184
sewardjffce8152011-06-24 10:09:41 +00006185static SO* SO__Alloc ( void )
6186{
sewardjf98e1c02008-10-25 16:22:41 +00006187 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6188 so->viR = VtsID_INVALID;
6189 so->viW = VtsID_INVALID;
6190 so->magic = SO_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00006191 /* Add to double linked list */
6192 if (admin_SO) {
6193 tl_assert(admin_SO->admin_prev == NULL);
6194 admin_SO->admin_prev = so;
6195 so->admin_next = admin_SO;
6196 } else {
6197 so->admin_next = NULL;
6198 }
6199 so->admin_prev = NULL;
6200 admin_SO = so;
6201 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006202 return so;
6203}
sewardjffce8152011-06-24 10:09:41 +00006204
6205static void SO__Dealloc ( SO* so )
6206{
sewardjf98e1c02008-10-25 16:22:41 +00006207 tl_assert(so);
6208 tl_assert(so->magic == SO_MAGIC);
6209 if (so->viR == VtsID_INVALID) {
6210 tl_assert(so->viW == VtsID_INVALID);
6211 } else {
6212 tl_assert(so->viW != VtsID_INVALID);
6213 VtsID__rcdec(so->viR);
6214 VtsID__rcdec(so->viW);
6215 }
6216 so->magic = 0;
sewardjffce8152011-06-24 10:09:41 +00006217 /* Del from double linked list */
6218 if (so->admin_prev)
6219 so->admin_prev->admin_next = so->admin_next;
6220 if (so->admin_next)
6221 so->admin_next->admin_prev = so->admin_prev;
6222 if (so == admin_SO)
6223 admin_SO = so->admin_next;
6224 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006225 HG_(free)( so );
6226}
6227
6228
6229/////////////////////////////////////////////////////////
6230// //
6231// Top Level API //
6232// //
6233/////////////////////////////////////////////////////////
6234
florian6bd9dc12012-11-23 16:17:43 +00006235static void show_thread_state ( const HChar* str, Thr* t )
sewardjf98e1c02008-10-25 16:22:41 +00006236{
6237 if (1) return;
6238 if (t->viR == t->viW) {
6239 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6240 VtsID__pp( t->viR );
6241 VG_(printf)("%s","\n");
6242 } else {
6243 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6244 VtsID__pp( t->viR );
6245 VG_(printf)(" viW %u==", t->viW);
6246 VtsID__pp( t->viW );
6247 VG_(printf)("%s","\n");
6248 }
6249}
6250
6251
6252Thr* libhb_init (
6253 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00006254 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00006255 )
6256{
6257 Thr* thr;
6258 VtsID vi;
sewardje4cce742011-02-24 15:25:24 +00006259
6260 // We will have to have to store a large number of these,
6261 // so make sure they're the size we expect them to be.
6262 tl_assert(sizeof(ScalarTS) == 8);
sewardjffce8152011-06-24 10:09:41 +00006263
6264 /* because first 1024 unusable */
6265 tl_assert(SCALARTS_N_THRBITS >= 11);
6266 /* so as to fit in a UInt w/ 3 bits to spare (see defn of
6267 Thr_n_RCEC). */
6268 tl_assert(SCALARTS_N_THRBITS <= 29);
6269
6270 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6271 (32-bit). It's not correctness-critical, but there are a lot of
6272 them, so it's important from a space viewpoint. Unfortunately
6273 we simply can't pack it into 2 words on a 32-bit target. */
6274 if (sizeof(UWord) == 8) {
6275 tl_assert(sizeof(Thr_n_RCEC) == 16);
6276 } else {
6277 tl_assert(sizeof(Thr_n_RCEC) == 12);
6278 }
6279
6280 /* Word sets really are 32 bits. Even on a 64 bit target. */
6281 tl_assert(sizeof(WordSetID) == 4);
6282 tl_assert(sizeof(WordSet) == sizeof(WordSetID));
sewardje4cce742011-02-24 15:25:24 +00006283
sewardjf98e1c02008-10-25 16:22:41 +00006284 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00006285 tl_assert(get_EC);
6286 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00006287 main_get_EC = get_EC;
6288
6289 // No need to initialise hg_wordfm.
6290 // No need to initialise hg_wordset.
6291
sewardj7aa38a92011-02-27 23:04:12 +00006292 /* Allocated once and never deallocated. Used as a temporary in
6293 VTS singleton, tick and join operations. */
6294 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6295 temp_max_sized_VTS->id = VtsID_INVALID;
philippec3508652015-03-28 12:01:58 +00006296 verydead_thread_tables_init();
sewardjf98e1c02008-10-25 16:22:41 +00006297 vts_set_init();
6298 vts_tab_init();
6299 event_map_init();
6300 VtsID__invalidate_caches();
6301
6302 // initialise shadow memory
philippe1475a7f2015-05-11 19:45:08 +00006303 zsm_init( );
sewardjf98e1c02008-10-25 16:22:41 +00006304
6305 thr = Thr__new();
6306 vi = VtsID__mk_Singleton( thr, 1 );
6307 thr->viR = vi;
6308 thr->viW = vi;
6309 VtsID__rcinc(thr->viR);
6310 VtsID__rcinc(thr->viW);
6311
6312 show_thread_state(" root", thr);
6313 return thr;
6314}
6315
sewardj23f12002009-07-24 08:45:08 +00006316
sewardjf98e1c02008-10-25 16:22:41 +00006317Thr* libhb_create ( Thr* parent )
6318{
6319 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6320 the child's index. Since the child's index is guaranteed
6321 unique, it has never been seen before, so the implicit value
6322 before the tick is zero and after that is one. */
6323 Thr* child = Thr__new();
6324
6325 child->viR = VtsID__tick( parent->viR, child );
6326 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00006327 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00006328 VtsID__rcinc(child->viR);
6329 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006330 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00006331 early for that - it may not have a valid TId yet. So, let
6332 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00006333
6334 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6335 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6336
6337 /* and the parent has to move along too */
6338 VtsID__rcdec(parent->viR);
6339 VtsID__rcdec(parent->viW);
6340 parent->viR = VtsID__tick( parent->viR, parent );
6341 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00006342 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00006343 VtsID__rcinc(parent->viR);
6344 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006345 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00006346
6347 show_thread_state(" child", child);
6348 show_thread_state("parent", parent);
6349
6350 return child;
6351}
6352
6353/* Shut down the library, and print stats (in fact that's _all_
6354 this is for. */
6355void libhb_shutdown ( Bool show_stats )
6356{
6357 if (show_stats) {
6358 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6359 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6360 stats__secmaps_allocd,
6361 stats__secmap_ga_space_covered);
6362 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6363 stats__secmap_linesZ_allocd,
6364 stats__secmap_linesZ_bytes);
philippe0fb30ac2015-05-15 13:17:17 +00006365 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)"
6366 " (%'10lu used)\n",
6367 stats__secmap_linesF_allocd, stats__secmap_linesF_bytes,
6368 shmem__SecMap_used_linesF());
philippef54cb662015-05-10 22:19:31 +00006369 VG_(printf)(" secmaps: %'10lu in map (can be scanGCed %'5lu)"
6370 " #%lu scanGC \n",
6371 stats__secmaps_in_map_shmem,
6372 shmem__SecMap_do_GC(False /* really do GC */),
6373 stats__secmaps_scanGC);
6374 tl_assert (VG_(sizeFM) (map_shmem) == stats__secmaps_in_map_shmem);
6375 VG_(printf)(" secmaps: %'10lu in freelist,"
6376 " total (scanGCed %'lu, ssetGCed %'lu)\n",
6377 SecMap_freelist_length(),
6378 stats__secmaps_scanGCed,
6379 stats__secmaps_ssetGCed);
sewardjf98e1c02008-10-25 16:22:41 +00006380 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6381 stats__secmaps_search, stats__secmaps_search_slow);
6382
6383 VG_(printf)("%s","\n");
6384 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6385 stats__cache_totrefs, stats__cache_totmisses );
6386 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6387 stats__cache_Z_fetches, stats__cache_F_fetches );
6388 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6389 stats__cache_Z_wbacks, stats__cache_F_wbacks );
philippef54cb662015-05-10 22:19:31 +00006390 VG_(printf)(" cache: %'14lu flushes_invals\n",
6391 stats__cache_flushes_invals );
sewardjf98e1c02008-10-25 16:22:41 +00006392 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6393 stats__cache_make_New_arange,
6394 stats__cache_make_New_inZrep);
6395
6396 VG_(printf)("%s","\n");
6397 VG_(printf)(" cline: %'10lu normalises\n",
6398 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00006399 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6400 stats__cline_cread64s,
6401 stats__cline_cread32s,
6402 stats__cline_cread16s,
6403 stats__cline_cread08s );
6404 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6405 stats__cline_cwrite64s,
6406 stats__cline_cwrite32s,
6407 stats__cline_cwrite16s,
6408 stats__cline_cwrite08s );
6409 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6410 stats__cline_swrite64s,
6411 stats__cline_swrite32s,
6412 stats__cline_swrite16s,
6413 stats__cline_swrite08s );
6414 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6415 stats__cline_sread08s, stats__cline_scopy08s );
philippef54cb662015-05-10 22:19:31 +00006416 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu"
6417 " 2to1 %'12lu\n",
6418 stats__cline_64to32splits, stats__cline_32to16splits,
6419 stats__cline_16to8splits );
6420 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu"
6421 " 2to1 %'12lu\n",
6422 stats__cline_64to32pulldown, stats__cline_32to16pulldown,
6423 stats__cline_16to8pulldown );
sewardjf98e1c02008-10-25 16:22:41 +00006424 if (0)
philippef54cb662015-05-10 22:19:31 +00006425 VG_(printf)(" cline: sizeof(CacheLineZ) %ld,"
6426 " covers %ld bytes of arange\n",
6427 (Word)sizeof(LineZ),
6428 (Word)N_LINE_ARANGE);
sewardjf98e1c02008-10-25 16:22:41 +00006429
6430 VG_(printf)("%s","\n");
6431
sewardjc8028ad2010-05-05 09:34:42 +00006432 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006433 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00006434 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006435 stats__msmcwrite, stats__msmcwrite_change);
6436 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6437 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00006438 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6439 stats__join2_queries, stats__join2_misses);
6440
6441 VG_(printf)("%s","\n");
philippef54cb662015-05-10 22:19:31 +00006442 VG_(printf)(" libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6443 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6444 VG_(printf)(" libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6445 stats__vts__cmp_structural, stats__vts__cmp_structural_slow);
6446 VG_(printf)(" libhb: VTSset: find__or__clone_and_add %'lu"
6447 " (%'lu allocd)\n",
sewardj7aa38a92011-02-27 23:04:12 +00006448 stats__vts_set__focaa, stats__vts_set__focaa_a );
sewardjc8028ad2010-05-05 09:34:42 +00006449 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6450 stats__vts__indexat_slow );
6451
6452 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00006453 VG_(printf)(
6454 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6455 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6456 );
philippe2bd23262015-05-11 20:56:49 +00006457 VG_(printf)(" libhb: #%lu vts_tab GC #%lu vts pruning\n",
6458 stats__vts_tab_GC, stats__vts_pruning);
sewardjf98e1c02008-10-25 16:22:41 +00006459 VG_(printf)( " libhb: %lu entries in vts_set\n",
6460 VG_(sizeFM)( vts_set ) );
6461
6462 VG_(printf)("%s","\n");
philippe900c5352015-03-24 14:02:44 +00006463 {
6464 UInt live = 0;
6465 UInt llexit_done = 0;
6466 UInt joinedwith_done = 0;
6467 UInt llexit_and_joinedwith_done = 0;
6468
6469 Thread* hgthread = get_admin_threads();
6470 tl_assert(hgthread);
6471 while (hgthread) {
6472 Thr* hbthr = hgthread->hbthr;
6473 tl_assert(hbthr);
6474 if (hbthr->llexit_done && hbthr->joinedwith_done)
6475 llexit_and_joinedwith_done++;
6476 else if (hbthr->llexit_done)
6477 llexit_done++;
6478 else if (hbthr->joinedwith_done)
6479 joinedwith_done++;
6480 else
6481 live++;
6482 hgthread = hgthread->admin;
6483 }
6484 VG_(printf)(" libhb: threads live: %d exit_and_joinedwith %d"
6485 " exit %d joinedwith %d\n",
6486 live, llexit_and_joinedwith_done,
6487 llexit_done, joinedwith_done);
philippec3508652015-03-28 12:01:58 +00006488 VG_(printf)(" libhb: %d verydead_threads, "
6489 "%d verydead_threads_not_pruned\n",
6490 (int) VG_(sizeXA)( verydead_thread_table),
6491 (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6492 tl_assert (VG_(sizeXA)( verydead_thread_table)
6493 + VG_(sizeXA)( verydead_thread_table_not_pruned)
6494 == llexit_and_joinedwith_done);
philippe900c5352015-03-24 14:02:44 +00006495 }
6496
6497 VG_(printf)("%s","\n");
philipped005b2c2015-04-21 21:58:14 +00006498 {
6499 UWord OldRef_accs_n[N_OLDREF_ACCS+1];
6500 UInt accs_n;
6501 UWord OldRef_n;
6502 UInt i;
6503
6504 OldRef_n = 0;
6505 for (i = 0; i <= N_OLDREF_ACCS; i++)
6506 OldRef_accs_n[i] = 0;
6507
6508 for (OldRef* o = mru.prev; o != &lru; o = o->prev) {
6509 OldRef_n++;
6510 accs_n = 0;
6511 for (i = 0; i < N_OLDREF_ACCS; i++) {
6512 if (o->accs[i].thrid != 0)
6513 accs_n++;
6514 }
6515 OldRef_accs_n[accs_n]++;
6516 }
6517
6518 tl_assert(OldRef_n == oldrefTreeN);
6519 VG_(printf)( " libhb: oldrefTreeN %lu ", oldrefTreeN);
6520 VG_(printf)( "( ");
6521 for (i = 0; i <= N_OLDREF_ACCS; i++)
6522 VG_(printf)( "accs[%d]=%lu ", i, OldRef_accs_n[i]);
6523 VG_(printf)( ")\n");
6524 }
sewardjf98e1c02008-10-25 16:22:41 +00006525 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
6526 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
6527 stats__ctxt_rcdec2,
6528 stats__ctxt_rcdec3 );
6529 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
6530 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
philippecabdbb52015-04-20 21:33:16 +00006531 VG_(printf)( " libhb: contextTab: %lu slots,"
6532 " %lu cur ents(ref'd %lu),"
philippe06bc23a2015-04-17 21:19:43 +00006533 " %lu max ents\n",
sewardjf98e1c02008-10-25 16:22:41 +00006534 (UWord)N_RCEC_TAB,
philippecabdbb52015-04-20 21:33:16 +00006535 stats__ctxt_tab_curr, RCEC_referenced,
6536 stats__ctxt_tab_max );
philippe47124e92015-04-25 14:00:24 +00006537 {
6538# define MAXCHAIN 10
6539 UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
6540 UInt non0chain = 0;
6541 UInt n;
6542 UInt i;
6543 RCEC *p;
6544
6545 for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
6546 for (i = 0; i < N_RCEC_TAB; i++) {
6547 n = 0;
6548 for (p = contextTab[i]; p; p = p->next)
6549 n++;
6550 if (n < MAXCHAIN)
6551 chains[n]++;
6552 else
6553 chains[MAXCHAIN]++;
6554 if (n > 0)
6555 non0chain++;
6556 }
6557 VG_(printf)( " libhb: contextTab chain of [length]=nchain."
6558 " Avg chain len %3.1f\n"
6559 " ",
6560 (Double)stats__ctxt_tab_curr
6561 / (Double)(non0chain ? non0chain : 1));
6562 for (i = 0; i <= MAXCHAIN; i++) {
6563 if (chains[i] != 0)
6564 VG_(printf)( "[%d%s]=%d ",
6565 i, i == MAXCHAIN ? "+" : "",
6566 chains[i]);
6567 }
6568 VG_(printf)( "\n");
6569# undef MAXCHAIN
6570 }
sewardjf98e1c02008-10-25 16:22:41 +00006571 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6572 stats__ctxt_tab_qs,
6573 stats__ctxt_tab_cmps );
6574#if 0
6575 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6576 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6577 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6578 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6579 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6580 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6581 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6582 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6583 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6584 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6585 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6586 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6587 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6588 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6589
6590 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6591 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6592 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6593 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6594#endif
6595
6596 VG_(printf)("%s","<<< END libhb stats >>>\n");
6597 VG_(printf)("%s","\n");
6598
6599 }
6600}
6601
sewardjffce8152011-06-24 10:09:41 +00006602/* Receive notification that a thread has low level exited. The
6603 significance here is that we do not expect to see any more memory
6604 references from it. */
sewardjf98e1c02008-10-25 16:22:41 +00006605void libhb_async_exit ( Thr* thr )
6606{
sewardj23f12002009-07-24 08:45:08 +00006607 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006608 tl_assert(!thr->llexit_done);
6609 thr->llexit_done = True;
sewardj2d2ea2f2009-08-02 10:15:07 +00006610
6611 /* free up Filter and local_Kws_n_stacks (well, actually not the
6612 latter ..) */
6613 tl_assert(thr->filter);
6614 HG_(free)(thr->filter);
6615 thr->filter = NULL;
6616
sewardjffce8152011-06-24 10:09:41 +00006617 /* Tell the VTS mechanism this thread has exited, so it can
6618 participate in VTS pruning. Note this can only happen if the
6619 thread has both ll_exited and has been joined with. */
6620 if (thr->joinedwith_done)
6621 VTS__declare_thread_very_dead(thr);
6622
sewardj2d2ea2f2009-08-02 10:15:07 +00006623 /* Another space-accuracy tradeoff. Do we want to be able to show
6624 H1 history for conflicts in threads which have since exited? If
6625 yes, then we better not free up thr->local_Kws_n_stacks. The
6626 downside is a potential per-thread leak of up to
6627 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6628 XArray average overcommit factor is (1.5 I'd guess). */
6629 // hence:
6630 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6631 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006632}
6633
sewardjffce8152011-06-24 10:09:41 +00006634/* Receive notification that a thread has been joined with. The
6635 significance here is that we do not expect to see any further
6636 references to its vector clocks (Thr::viR and Thr::viW). */
6637void libhb_joinedwith_done ( Thr* thr )
6638{
6639 tl_assert(thr);
6640 /* Caller must ensure that this is only ever called once per Thr. */
6641 tl_assert(!thr->joinedwith_done);
6642 thr->joinedwith_done = True;
6643 if (thr->llexit_done)
6644 VTS__declare_thread_very_dead(thr);
6645}
6646
6647
sewardjf98e1c02008-10-25 16:22:41 +00006648/* Both Segs and SOs point to VTSs. However, there is no sharing, so
6649 a Seg that points at a VTS is its one-and-only owner, and ditto for
6650 a SO that points at a VTS. */
6651
6652SO* libhb_so_alloc ( void )
6653{
6654 return SO__Alloc();
6655}
6656
6657void libhb_so_dealloc ( SO* so )
6658{
6659 tl_assert(so);
6660 tl_assert(so->magic == SO_MAGIC);
6661 SO__Dealloc(so);
6662}
6663
6664/* See comments in libhb.h for details on the meaning of
6665 strong vs weak sends and strong vs weak receives. */
6666void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6667{
6668 /* Copy the VTSs from 'thr' into the sync object, and then move
6669 the thread along one step. */
6670
6671 tl_assert(so);
6672 tl_assert(so->magic == SO_MAGIC);
6673
6674 /* stay sane .. a thread's read-clock must always lead or be the
6675 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00006676 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6677 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00006678 }
6679
6680 /* since we're overwriting the VtsIDs in the SO, we need to drop
6681 any references made by the previous contents thereof */
6682 if (so->viR == VtsID_INVALID) {
6683 tl_assert(so->viW == VtsID_INVALID);
6684 so->viR = thr->viR;
6685 so->viW = thr->viW;
6686 VtsID__rcinc(so->viR);
6687 VtsID__rcinc(so->viW);
6688 } else {
6689 /* In a strong send, we dump any previous VC in the SO and
6690 install the sending thread's VC instead. For a weak send we
6691 must join2 with what's already there. */
6692 tl_assert(so->viW != VtsID_INVALID);
6693 VtsID__rcdec(so->viR);
6694 VtsID__rcdec(so->viW);
6695 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6696 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6697 VtsID__rcinc(so->viR);
6698 VtsID__rcinc(so->viW);
6699 }
6700
6701 /* move both parent clocks along */
6702 VtsID__rcdec(thr->viR);
6703 VtsID__rcdec(thr->viW);
6704 thr->viR = VtsID__tick( thr->viR, thr );
6705 thr->viW = VtsID__tick( thr->viW, thr );
sewardjffce8152011-06-24 10:09:41 +00006706 if (!thr->llexit_done) {
sewardj2d2ea2f2009-08-02 10:15:07 +00006707 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00006708 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006709 }
sewardjf98e1c02008-10-25 16:22:41 +00006710 VtsID__rcinc(thr->viR);
6711 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006712
sewardjf98e1c02008-10-25 16:22:41 +00006713 if (strong_send)
6714 show_thread_state("s-send", thr);
6715 else
6716 show_thread_state("w-send", thr);
6717}
6718
6719void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6720{
6721 tl_assert(so);
6722 tl_assert(so->magic == SO_MAGIC);
6723
6724 if (so->viR != VtsID_INVALID) {
6725 tl_assert(so->viW != VtsID_INVALID);
6726
6727 /* Weak receive (basically, an R-acquisition of a R-W lock).
6728 This advances the read-clock of the receiver, but not the
6729 write-clock. */
6730 VtsID__rcdec(thr->viR);
6731 thr->viR = VtsID__join2( thr->viR, so->viR );
6732 VtsID__rcinc(thr->viR);
6733
sewardj90eb22e2009-07-28 20:22:18 +00006734 /* At one point (r10589) it seemed safest to tick the clocks for
6735 the receiving thread after the join. But on reflection, I
6736 wonder if that might cause it to 'overtake' constraints,
6737 which could lead to missing races. So, back out that part of
6738 r10589. */
6739 //VtsID__rcdec(thr->viR);
6740 //thr->viR = VtsID__tick( thr->viR, thr );
6741 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00006742
sewardjf98e1c02008-10-25 16:22:41 +00006743 /* For a strong receive, we also advance the receiver's write
6744 clock, which means the receive as a whole is essentially
6745 equivalent to a W-acquisition of a R-W lock. */
6746 if (strong_recv) {
6747 VtsID__rcdec(thr->viW);
6748 thr->viW = VtsID__join2( thr->viW, so->viW );
6749 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006750
sewardj90eb22e2009-07-28 20:22:18 +00006751 /* See comment just above, re r10589. */
6752 //VtsID__rcdec(thr->viW);
6753 //thr->viW = VtsID__tick( thr->viW, thr );
6754 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00006755 }
6756
sewardjf4845dc2010-05-28 20:09:59 +00006757 if (thr->filter)
6758 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00006759 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00006760
sewardjf98e1c02008-10-25 16:22:41 +00006761 if (strong_recv)
6762 show_thread_state("s-recv", thr);
6763 else
6764 show_thread_state("w-recv", thr);
6765
6766 } else {
6767 tl_assert(so->viW == VtsID_INVALID);
6768 /* Deal with degenerate case: 'so' has no vts, so there has been
6769 no message posted to it. Just ignore this case. */
6770 show_thread_state("d-recv", thr);
6771 }
6772}
6773
6774Bool libhb_so_everSent ( SO* so )
6775{
6776 if (so->viR == VtsID_INVALID) {
6777 tl_assert(so->viW == VtsID_INVALID);
6778 return False;
6779 } else {
6780 tl_assert(so->viW != VtsID_INVALID);
6781 return True;
6782 }
6783}
6784
6785#define XXX1 0 // 0x67a106c
6786#define XXX2 0
6787
sewardj23f12002009-07-24 08:45:08 +00006788static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00006789 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6790 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6791 return False;
6792}
florian0c8a47c2013-10-01 20:10:21 +00006793static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
florian6bf37262012-10-21 03:23:36 +00006794{
sewardj23f12002009-07-24 08:45:08 +00006795 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00006796 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6797 show_thread_state("", thr);
6798 VG_(printf)("%s","\n");
6799}
6800
sewardj23f12002009-07-24 08:45:08 +00006801void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006802{
6803 SVal sv = SVal__mkC(thr->viW, thr->viW);
6804 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00006805 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6806 zsm_sset_range( a, szB, sv );
6807 Filter__clear_range( thr->filter, a, szB );
6808 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00006809}
6810
sewardjfd35d492011-03-17 19:39:55 +00006811void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006812{
sewardj23f12002009-07-24 08:45:08 +00006813 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00006814}
6815
philippef54cb662015-05-10 22:19:31 +00006816
6817/* Set the lines zix_start till zix_end to NOACCESS. */
6818static void zsm_secmap_line_range_noaccess (SecMap *sm,
6819 UInt zix_start, UInt zix_end)
6820{
6821 for (UInt lz = zix_start; lz <= zix_end; lz++) {
6822 LineZ* lineZ;
6823 LineF* lineF;
6824 lineZ = &sm->linesZ[lz];
6825 if (lineZ->dict[0] != SVal_INVALID) {
6826 rcdec_LineZ(lineZ);
6827 } else {
6828 UInt fix = (UInt)lineZ->dict[1];
6829 tl_assert(sm->linesF);
6830 tl_assert(sm->linesF_size > 0);
6831 tl_assert(fix >= 0 && fix < sm->linesF_size);
6832 lineF = &sm->linesF[fix];
6833 rcdec_LineF(lineF);
6834 lineF->inUse = False;
6835 }
6836 lineZ->dict[0] = SVal_NOACCESS;
6837 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
6838 for (UInt i = 0; i < N_LINE_ARANGE/4; i++)
6839 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
6840 }
6841}
6842
6843/* Set the given range to SVal_NOACCESS in-place in the secmap.
6844 a must be cacheline aligned. len must be a multiple of a cacheline
6845 and must be < N_SECMAP_ARANGE. */
6846static void zsm_sset_range_noaccess_in_secmap(Addr a, SizeT len)
6847{
6848 tl_assert (is_valid_scache_tag (a));
6849 tl_assert (0 == (len & (N_LINE_ARANGE - 1)));
6850 tl_assert (len < N_SECMAP_ARANGE);
6851
6852 SecMap *sm1 = shmem__find_SecMap (a);
6853 SecMap *sm2 = shmem__find_SecMap (a + len - 1);
6854 UWord zix_start = shmem__get_SecMap_offset(a ) >> N_LINE_BITS;
6855 UWord zix_end = shmem__get_SecMap_offset(a + len - 1) >> N_LINE_BITS;
6856
6857 if (sm1) {
6858 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm1));
6859 zsm_secmap_line_range_noaccess (sm1, zix_start,
6860 sm1 == sm2 ? zix_end : N_SECMAP_ZLINES-1);
6861 }
6862 if (sm2 && sm1 != sm2) {
6863 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm2));
6864 zsm_secmap_line_range_noaccess (sm2, 0, zix_end);
6865 }
6866}
6867
6868/* Set the given address range to SVal_NOACCESS.
6869 The SecMaps fully set to SVal_NOACCESS will be pushed in SecMap_freelist. */
6870static void zsm_sset_range_noaccess (Addr addr, SizeT len)
6871{
6872 /*
6873 BPC = Before, Partial Cacheline, = addr
6874 (i.e. starting inside a cacheline/inside a SecMap)
6875 BFC = Before, Full Cacheline(s), but not full SecMap
6876 (i.e. starting inside a SecMap)
6877 FSM = Full SecMap(s)
6878 (i.e. starting a SecMap)
6879 AFC = After, Full Cacheline(s), but not full SecMap
6880 (i.e. first address after the full SecMap(s))
6881 APC = After, Partial Cacheline, i.e. first address after the
6882 full CacheLines).
6883 ARE = After Range End = addr+len = first address not part of the range.
6884
6885 If addr starts a Cacheline, then BPC == BFC.
6886 If addr starts a SecMap, then BPC == BFC == FSM.
6887 If addr+len starts a SecMap, then APC == ARE == AFC
6888 If addr+len starts a Cacheline, then APC == ARE
6889 */
6890 Addr ARE = addr + len;
6891 Addr BPC = addr;
6892 Addr BFC = ROUNDUP(BPC, N_LINE_ARANGE);
6893 Addr FSM = ROUNDUP(BPC, N_SECMAP_ARANGE);
6894 Addr AFC = ROUNDDN(ARE, N_SECMAP_ARANGE);
6895 Addr APC = ROUNDDN(ARE, N_LINE_ARANGE);
6896 SizeT Plen = len; // Plen will be split between the following:
6897 SizeT BPClen;
6898 SizeT BFClen;
6899 SizeT FSMlen;
6900 SizeT AFClen;
6901 SizeT APClen;
6902
6903 /* Consumes from Plen the nr of bytes between from and to.
6904 from and to must be aligned on a multiple of round.
6905 The length consumed will be a multiple of round, with
6906 a maximum of Plen. */
6907# define PlenCONSUME(from, to, round, consumed) \
6908 do { \
6909 if (from < to) { \
6910 if (to - from < Plen) \
6911 consumed = to - from; \
6912 else \
6913 consumed = ROUNDDN(Plen, round); \
6914 } else { \
6915 consumed = 0; \
6916 } \
6917 Plen -= consumed; } while (0)
6918
6919 PlenCONSUME(BPC, BFC, 1, BPClen);
6920 PlenCONSUME(BFC, FSM, N_LINE_ARANGE, BFClen);
6921 PlenCONSUME(FSM, AFC, N_SECMAP_ARANGE, FSMlen);
6922 PlenCONSUME(AFC, APC, N_LINE_ARANGE, AFClen);
6923 PlenCONSUME(APC, ARE, 1, APClen);
6924
6925 if (0)
6926 VG_(printf) ("addr %p[%ld] ARE %p"
6927 " BPC %p[%ld] BFC %p[%ld] FSM %p[%ld]"
6928 " AFC %p[%ld] APC %p[%ld]\n",
6929 (void*)addr, len, (void*)ARE,
6930 (void*)BPC, BPClen, (void*)BFC, BFClen, (void*)FSM, FSMlen,
6931 (void*)AFC, AFClen, (void*)APC, APClen);
6932
6933 tl_assert (Plen == 0);
6934
6935 /* Set to NOACCESS pieces before and after not covered by entire SecMaps. */
6936
6937 /* First we set the partial cachelines. This is done through the cache. */
6938 if (BPClen > 0)
6939 zsm_sset_range_SMALL (BPC, BPClen, SVal_NOACCESS);
6940 if (APClen > 0)
6941 zsm_sset_range_SMALL (APC, APClen, SVal_NOACCESS);
6942
6943 /* After this, we will not use the cache anymore. We will directly work
6944 in-place on the z shadow memory in SecMap(s).
6945 So, we invalidate the cachelines for the whole range we are setting
6946 to NOACCESS below. */
6947 shmem__invalidate_scache_range (BFC, APC - BFC);
6948
6949 if (BFClen > 0)
6950 zsm_sset_range_noaccess_in_secmap (BFC, BFClen);
6951 if (AFClen > 0)
6952 zsm_sset_range_noaccess_in_secmap (AFC, AFClen);
6953
6954 if (FSMlen > 0) {
6955 /* Set to NOACCESS all the SecMaps, pushing the SecMaps to the
6956 free list. */
6957 Addr sm_start = FSM;
6958 while (sm_start < AFC) {
6959 SecMap *sm = shmem__find_SecMap (sm_start);
6960 if (sm) {
6961 Addr gaKey;
6962 SecMap *fm_sm;
6963
6964 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
6965 for (UInt lz = 0; lz < N_SECMAP_ZLINES; lz++) {
philippefc00a2a2015-05-15 11:41:54 +00006966 if (LIKELY(sm->linesZ[lz].dict[0] != SVal_INVALID))
philippef54cb662015-05-10 22:19:31 +00006967 rcdec_LineZ(&sm->linesZ[lz]);
6968 }
6969 for (UInt lf = 0; lf < sm->linesF_size; lf++) {
6970 if (sm->linesF[lf].inUse)
6971 rcdec_LineF (&sm->linesF[lf]);
6972 }
6973 if (sm->linesF_size > 0) {
6974 HG_(free)(sm->linesF);
6975 stats__secmap_linesF_allocd -= sm->linesF_size;
6976 stats__secmap_linesF_bytes -= sm->linesF_size * sizeof(LineF);
6977 }
6978 if (!VG_(delFromFM)(map_shmem, &gaKey, (UWord*)&fm_sm, sm_start))
6979 tl_assert (0);
6980 stats__secmaps_in_map_shmem--;
6981 tl_assert (gaKey == sm_start);
6982 tl_assert (sm == fm_sm);
6983 stats__secmaps_ssetGCed++;
6984 push_SecMap_on_freelist (sm);
6985 }
6986 sm_start += N_SECMAP_ARANGE;
6987 }
6988 tl_assert (sm_start == AFC);
6989
6990 /* The above loop might have kept copies of freed SecMap in the smCache.
6991 => clear them. */
6992 if (address_in_range(smCache[0].gaKey, FSM, FSMlen)) {
6993 smCache[0].gaKey = 1;
6994 smCache[0].sm = NULL;
6995 }
6996 if (address_in_range(smCache[1].gaKey, FSM, FSMlen)) {
6997 smCache[1].gaKey = 1;
6998 smCache[1].sm = NULL;
6999 }
7000 if (address_in_range(smCache[2].gaKey, FSM, FSMlen)) {
7001 smCache[2].gaKey = 1;
7002 smCache[2].sm = NULL;
7003 }
7004 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(SMCacheEnt));
7005 }
7006}
7007
sewardjfd35d492011-03-17 19:39:55 +00007008void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
7009{
7010 /* This really does put the requested range in NoAccess. It's
7011 expensive though. */
7012 SVal sv = SVal_NOACCESS;
7013 tl_assert(is_sane_SVal_C(sv));
philippef54cb662015-05-10 22:19:31 +00007014 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7015 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7016 else
7017 zsm_sset_range_noaccess (a, szB);
sewardjfd35d492011-03-17 19:39:55 +00007018 Filter__clear_range( thr->filter, a, szB );
7019}
7020
philippef54cb662015-05-10 22:19:31 +00007021/* Works byte at a time. Can be optimised if needed. */
7022UWord libhb_srange_get_abits (Addr a, UChar *abits, SizeT len)
7023{
7024 UWord anr = 0; // nr of bytes addressable.
7025
7026 /* Get the accessibility of each byte. Pay attention to not
7027 create SecMap or LineZ when checking if a byte is addressable.
7028
7029 Note: this is used for client request. Performance deemed not critical.
7030 So for simplicity, we work byte per byte.
7031 Performance could be improved by working with full cachelines
7032 or with full SecMap, when reaching a cacheline or secmap boundary. */
7033 for (SizeT i = 0; i < len; i++) {
7034 SVal sv = SVal_INVALID;
7035 Addr b = a + i;
7036 Addr tag = b & ~(N_LINE_ARANGE - 1);
7037 UWord wix = (b >> N_LINE_BITS) & (N_WAY_NENT - 1);
7038 UWord cloff = get_cacheline_offset(b);
7039
7040 /* Note: we do not use get_cacheline(b) to avoid creating cachelines
7041 and/or SecMap for non addressable bytes. */
7042 if (tag == cache_shmem.tags0[wix]) {
7043 CacheLine copy = cache_shmem.lyns0[wix];
7044 /* We work on a copy of the cacheline, as we do not want to
7045 record the client request as a real read.
7046 The below is somewhat similar to zsm_sapply08__msmcread but
7047 avoids side effects on the cache. */
7048 UWord toff = get_tree_offset(b); /* == 0 .. 7 */
7049 UWord tno = get_treeno(b);
7050 UShort descr = copy.descrs[tno];
7051 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
7052 SVal* tree = &copy.svals[tno << 3];
7053 copy.descrs[tno] = pulldown_to_8(tree, toff, descr);
7054 }
7055 sv = copy.svals[cloff];
7056 } else {
7057 /* Byte not found in the cacheline. Search for a SecMap. */
7058 SecMap *sm = shmem__find_SecMap(b);
7059 LineZ *lineZ;
7060 if (sm == NULL)
7061 sv = SVal_NOACCESS;
7062 else {
7063 UWord zix = shmem__get_SecMap_offset(b) >> N_LINE_BITS;
7064 lineZ = &sm->linesZ[zix];
7065 if (lineZ->dict[0] == SVal_INVALID) {
7066 UInt fix = (UInt)lineZ->dict[1];
7067 sv = sm->linesF[fix].w64s[cloff];
7068 } else {
7069 UWord ix = read_twobit_array( lineZ->ix2s, cloff );
7070 sv = lineZ->dict[ix];
7071 }
7072 }
7073 }
7074
7075 tl_assert (sv != SVal_INVALID);
7076 if (sv == SVal_NOACCESS) {
7077 if (abits)
7078 abits[i] = 0x00;
7079 } else {
7080 if (abits)
7081 abits[i] = 0xff;
7082 anr++;
7083 }
7084 }
7085
7086 return anr;
7087}
7088
7089
sewardj406bac82010-03-03 23:03:40 +00007090void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
7091{
7092 SVal sv = SVal_NOACCESS;
7093 tl_assert(is_sane_SVal_C(sv));
7094 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
philippef54cb662015-05-10 22:19:31 +00007095 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7096 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7097 else
7098 zsm_sset_range_noaccess (a, szB);
sewardj406bac82010-03-03 23:03:40 +00007099 Filter__clear_range( thr->filter, a, szB );
7100 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
7101}
7102
sewardj0b20a152011-03-10 21:34:21 +00007103Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
sewardjf98e1c02008-10-25 16:22:41 +00007104 tl_assert(thr);
sewardj60626642011-03-10 15:14:37 +00007105 return thr->hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00007106}
7107
sewardj0b20a152011-03-10 21:34:21 +00007108void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
sewardjf98e1c02008-10-25 16:22:41 +00007109 tl_assert(thr);
sewardj0b20a152011-03-10 21:34:21 +00007110 thr->hgthread = hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00007111}
7112
sewardj23f12002009-07-24 08:45:08 +00007113void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00007114{
sewardj23f12002009-07-24 08:45:08 +00007115 zsm_scopy_range(src, dst, len);
7116 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00007117}
7118
7119void libhb_maybe_GC ( void )
7120{
philippecabdbb52015-04-20 21:33:16 +00007121 /* GC the unreferenced (zero rc) RCECs when
philippee0829e02015-04-21 20:55:40 +00007122 (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
7123 with mostly NULL ptr)
7124 and (2) approaching the max nr of RCEC (as we have in any case
7125 at least that amount of RCEC in the pool allocator)
7126 Note: the margin allows to avoid a small but constant increase
7127 of the max nr of RCEC due to the fact that libhb_maybe_GC is
7128 not called when the current nr of RCEC exactly reaches the max.
7129 and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
7130 Avoid growing too much the nr of RCEC keeps the memory use low,
7131 and avoids to have too many elements in the (fixed) contextTab hashtable.
7132 */
philippecabdbb52015-04-20 21:33:16 +00007133 if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
philippee0829e02015-04-21 20:55:40 +00007134 && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
philippef54cb662015-05-10 22:19:31 +00007135 && (stats__ctxt_tab_curr * 3)/4 > RCEC_referenced))
philippecabdbb52015-04-20 21:33:16 +00007136 do_RCEC_GC();
philippe158404e2015-04-10 19:34:14 +00007137
philippef54cb662015-05-10 22:19:31 +00007138 /* If there are still no entries available (all the table entries are full),
7139 and we hit the threshhold point, then do a GC */
7140 Bool vts_tab_GC = vts_tab_freelist == VtsID_INVALID
7141 && VG_(sizeXA)( vts_tab ) >= vts_next_GC_at;
7142 if (UNLIKELY (vts_tab_GC))
7143 vts_tab__do_GC( False/*don't show stats*/ );
7144
7145 /* scan GC the SecMaps when
7146 (1) no SecMap in the freelist
7147 and (2) the current nr of live secmaps exceeds the threshold. */
7148 if (UNLIKELY(SecMap_freelist == NULL
7149 && stats__secmaps_in_map_shmem >= next_SecMap_GC_at)) {
7150 // If we did a vts tab GC, then no need to flush the cache again.
7151 if (!vts_tab_GC)
7152 zsm_flush_cache();
7153 shmem__SecMap_do_GC(True);
7154 }
philippecabdbb52015-04-20 21:33:16 +00007155
7156 /* Check the reference counts (expensive) */
7157 if (CHECK_CEM)
7158 event_map__check_reference_counts();
sewardjf98e1c02008-10-25 16:22:41 +00007159}
7160
7161
7162/////////////////////////////////////////////////////////////////
7163/////////////////////////////////////////////////////////////////
7164// //
7165// SECTION END main library //
7166// //
7167/////////////////////////////////////////////////////////////////
7168/////////////////////////////////////////////////////////////////
7169
7170/*--------------------------------------------------------------------*/
7171/*--- end libhb_main.c ---*/
7172/*--------------------------------------------------------------------*/