blob: d7710c23f6230e03fb1a2afd87da58df6d645a42 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
sewardj0f157dd2013-10-18 14:27:36 +000012 Copyright (C) 2008-2013 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000034#include "pub_tool_poolalloc.h"
sewardjf98e1c02008-10-25 16:22:41 +000035#include "pub_tool_libcassert.h"
36#include "pub_tool_libcbase.h"
37#include "pub_tool_libcprint.h"
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000040#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000041#include "pub_tool_xarray.h"
42#include "pub_tool_oset.h"
43#include "pub_tool_threadstate.h"
44#include "pub_tool_aspacemgr.h"
45#include "pub_tool_execontext.h"
46#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000047#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000048#include "hg_basics.h"
49#include "hg_wordset.h"
50#include "hg_lock_n_thread.h"
51#include "hg_errors.h"
52
53#include "libhb.h"
54
55
sewardj8f5374e2008-12-07 11:40:17 +000056/////////////////////////////////////////////////////////////////
57/////////////////////////////////////////////////////////////////
58// //
59// Debugging #defines //
60// //
61/////////////////////////////////////////////////////////////////
62/////////////////////////////////////////////////////////////////
63
64/* Check the sanity of shadow values in the core memory state
65 machine. Change #if 0 to #if 1 to enable this. */
66#if 0
67# define CHECK_MSM 1
68#else
69# define CHECK_MSM 0
70#endif
71
72
73/* Check sanity (reference counts, etc) in the conflicting access
74 machinery. Change #if 0 to #if 1 to enable this. */
75#if 0
76# define CHECK_CEM 1
77#else
78# define CHECK_CEM 0
79#endif
80
81
82/* Check sanity in the compressed shadow memory machinery,
83 particularly in its caching innards. Unfortunately there's no
84 almost-zero-cost way to make them selectable at run time. Hence
85 set the #if 0 to #if 1 and rebuild if you want them. */
86#if 0
87# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
88# define inline __attribute__((noinline))
89 /* probably want to ditch -fomit-frame-pointer too */
90#else
91# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
92#endif
93
94
95/////////////////////////////////////////////////////////////////
96/////////////////////////////////////////////////////////////////
97// //
sewardjffce8152011-06-24 10:09:41 +000098// data decls: VtsID //
99// //
100/////////////////////////////////////////////////////////////////
101/////////////////////////////////////////////////////////////////
102
103/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
104 bits, since they have to be packed into the lowest 30 bits of an
105 SVal. */
106typedef UInt VtsID;
107#define VtsID_INVALID 0xFFFFFFFF
108
109
110
111/////////////////////////////////////////////////////////////////
112/////////////////////////////////////////////////////////////////
113// //
114// data decls: SVal //
115// //
116/////////////////////////////////////////////////////////////////
117/////////////////////////////////////////////////////////////////
118
119typedef ULong SVal;
120
121/* This value has special significance to the implementation, and callers
122 may not store it in the shadow memory. */
123#define SVal_INVALID (3ULL << 62)
124
125/* This is the default value for shadow memory. Initially the shadow
126 memory contains no accessible areas and so all reads produce this
127 value. TODO: make this caller-defineable. */
128#define SVal_NOACCESS (2ULL << 62)
129
130
131
132/////////////////////////////////////////////////////////////////
133/////////////////////////////////////////////////////////////////
134// //
135// data decls: ScalarTS //
136// //
137/////////////////////////////////////////////////////////////////
138/////////////////////////////////////////////////////////////////
139
140/* Scalar Timestamp. We have to store a lot of these, so there is
141 some effort to make them as small as possible. Logically they are
142 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
143 We pack it into 64 bits by representing the Thr* using a ThrID, a
144 small integer (18 bits), and a 46 bit integer for the timestamp
145 number. The 46/18 split is arbitary, but has the effect that
146 Helgrind can only handle programs that create 2^18 or fewer threads
147 over their entire lifetime, and have no more than 2^46 timestamp
148 ticks (synchronisation operations on the same thread).
149
150 This doesn't seem like much of a limitation. 2^46 ticks is
151 7.06e+13, and if each tick (optimistically) takes the machine 1000
152 cycles to process, then the minimum time to process that many ticks
153 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
154 but VTS ticks, which isn't realistic.
155
156 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
157 32 since a ThrID is a UInt. 29 comes from the fact that
158 'Thr_n_RCEC', which records information about old accesses, packs
159 not only a ThrID but also 2+1 other bits (access size and
160 writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
161
162 NB2: thrid values are issued upwards from 1024, and values less
163 than that aren't valid. This isn't per se necessary (any order
164 will do, so long as they are unique), but it does help ensure they
165 are less likely to get confused with the various other kinds of
166 small-integer thread ids drifting around (eg, TId). See also NB5.
167
168 NB3: this probably also relies on the fact that Thr's are never
169 deallocated -- they exist forever. Hence the 1-1 mapping from
170 Thr's to thrid values (set up in Thr__new) persists forever.
171
172 NB4: temp_max_sized_VTS is allocated at startup and never freed.
173 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
174 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
175 making the memory use for this go sky-high. With
176 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
177 like an OK tradeoff. If more than 256k threads need to be
178 supported, we could change SCALARTS_N_THRBITS to 20, which would
179 facilitate supporting 1 million threads at the cost of 8MB storage
180 for temp_max_sized_VTS.
181
182 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
183 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
184 must never be a valid ThrID. Given NB2 that's OK.
185*/
186#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
187
188#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
189typedef
190 struct {
191 ThrID thrid : SCALARTS_N_THRBITS;
192 ULong tym : SCALARTS_N_TYMBITS;
193 }
194 ScalarTS;
195
196#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
197
198
199
200/////////////////////////////////////////////////////////////////
201/////////////////////////////////////////////////////////////////
202// //
203// data decls: Filter //
204// //
205/////////////////////////////////////////////////////////////////
206/////////////////////////////////////////////////////////////////
207
208// baseline: 5, 9
209#define FI_LINE_SZB_LOG2 5
210#define FI_NUM_LINES_LOG2 10
211
212#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
213#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
214
215#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
216#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
217
218#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
219 & (Addr)(FI_NUM_LINES-1) )
220
221
222/* In the lines, each 8 bytes are treated individually, and are mapped
223 to a UShort. Regardless of endianness of the underlying machine,
224 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
225 the highest address.
226
227 Of each bit pair, the higher numbered bit is set if a R has been
228 seen, so the actual layout is:
229
230 15 14 ... 01 00
231
232 R W for addr+7 ... R W for addr+0
233
234 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
235*/
236
237/* tags are separated from lines. tags are Addrs and are
238 the base address of the line. */
239typedef
240 struct {
241 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
242 }
243 FiLine;
244
245typedef
246 struct {
247 Addr tags[FI_NUM_LINES];
248 FiLine lines[FI_NUM_LINES];
249 }
250 Filter;
251
252
253
254/////////////////////////////////////////////////////////////////
255/////////////////////////////////////////////////////////////////
256// //
257// data decls: Thr, ULong_n_EC //
258// //
259/////////////////////////////////////////////////////////////////
260/////////////////////////////////////////////////////////////////
261
262// Records stacks for H1 history mechanism (DRD-style)
263typedef
264 struct { ULong ull; ExeContext* ec; }
265 ULong_n_EC;
266
267
268/* How many of the above records to collect for each thread? Older
269 ones are dumped when we run out of space. 62.5k requires 1MB per
270 thread, since each ULong_n_EC record is 16 bytes long. When more
271 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
272 deleted to make space. Hence in the worst case we will be able to
273 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
274 Kw transitions (segments in this thread). For the current setting
275 that gives a guaranteed stack for at least the last 31.25k
276 segments. */
277#define N_KWs_N_STACKs_PER_THREAD 62500
278
279
280struct _Thr {
281 /* Current VTSs for this thread. They change as we go along. viR
282 is the VTS to be used for reads, viW for writes. Usually they
283 are the same, but can differ when we deal with reader-writer
284 locks. It is always the case that
285 VtsID__cmpLEQ(viW,viR) == True
286 that is, viW must be the same, or lagging behind, viR. */
287 VtsID viR;
288 VtsID viW;
289
290 /* Is initially False, and is set to True after the thread really
291 has done a low-level exit. When True, we expect to never see
292 any more memory references done by this thread. */
293 Bool llexit_done;
294
295 /* Is initially False, and is set to True after the thread has been
296 joined with (reaped by some other thread). After this point, we
297 do not expect to see any uses of .viR or .viW, so it is safe to
298 set them to VtsID_INVALID. */
299 Bool joinedwith_done;
300
301 /* A small integer giving a unique identity to this Thr. See
302 comments on the definition of ScalarTS for details. */
303 ThrID thrid : SCALARTS_N_THRBITS;
304
305 /* A filter that removes references for which we believe that
306 msmcread/msmcwrite will not change the state, nor report a
307 race. */
308 Filter* filter;
309
310 /* A pointer back to the top level Thread structure. There is a
311 1-1 mapping between Thread and Thr structures -- each Thr points
312 at its corresponding Thread, and vice versa. Really, Thr and
313 Thread should be merged into a single structure. */
314 Thread* hgthread;
315
316 /* The ULongs (scalar Kws) in this accumulate in strictly
317 increasing order, without duplicates. This is important because
318 we need to be able to find a given scalar Kw in this array
319 later, by binary search. */
320 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
321};
322
323
324
325/////////////////////////////////////////////////////////////////
326/////////////////////////////////////////////////////////////////
327// //
328// data decls: SO //
329// //
330/////////////////////////////////////////////////////////////////
331/////////////////////////////////////////////////////////////////
332
333// (UInt) `echo "Synchronisation object" | md5sum`
334#define SO_MAGIC 0x56b3c5b0U
335
336struct _SO {
337 struct _SO* admin_prev;
338 struct _SO* admin_next;
339 VtsID viR; /* r-clock of sender */
340 VtsID viW; /* w-clock of sender */
341 UInt magic;
342};
343
344
345
346/////////////////////////////////////////////////////////////////
347/////////////////////////////////////////////////////////////////
348// //
sewardj8f5374e2008-12-07 11:40:17 +0000349// Forward declarations //
350// //
351/////////////////////////////////////////////////////////////////
352/////////////////////////////////////////////////////////////////
353
sewardjf98e1c02008-10-25 16:22:41 +0000354/* fwds for
355 Globals needed by other parts of the library. These are set
356 once at startup and then never changed. */
357static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000358static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000359
sewardjffce8152011-06-24 10:09:41 +0000360/* misc fn and data fwdses */
361static void VtsID__rcinc ( VtsID ii );
362static void VtsID__rcdec ( VtsID ii );
363
364static inline Bool SVal__isC ( SVal s );
365static inline VtsID SVal__unC_Rmin ( SVal s );
366static inline VtsID SVal__unC_Wmin ( SVal s );
367static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
philippe1475a7f2015-05-11 19:45:08 +0000368static inline void SVal__rcinc ( SVal s );
369static inline void SVal__rcdec ( SVal s );
sewardjffce8152011-06-24 10:09:41 +0000370
371/* A double linked list of all the SO's. */
372SO* admin_SO;
373
sewardjf98e1c02008-10-25 16:22:41 +0000374
375
376/////////////////////////////////////////////////////////////////
377/////////////////////////////////////////////////////////////////
378// //
379// SECTION BEGIN compressed shadow memory //
380// //
381/////////////////////////////////////////////////////////////////
382/////////////////////////////////////////////////////////////////
383
384#ifndef __HB_ZSM_H
385#define __HB_ZSM_H
386
sewardjf98e1c02008-10-25 16:22:41 +0000387/* Initialise the library. Once initialised, it will (or may) call
philippe1475a7f2015-05-11 19:45:08 +0000388 SVal__rcinc and SVal__rcdec in response to all the calls below, in order to
sewardjf98e1c02008-10-25 16:22:41 +0000389 allow the user to do reference counting on the SVals stored herein.
390 It is important to understand, however, that due to internal
391 caching, the reference counts are in general inaccurate, and can be
392 both above or below the true reference count for an item. In
393 particular, the library may indicate that the reference count for
394 an item is zero, when in fact it is not.
395
396 To make the reference counting exact and therefore non-pointless,
397 call zsm_flush_cache. Immediately after it returns, the reference
398 counts for all items, as deduced by the caller by observing calls
philippe1475a7f2015-05-11 19:45:08 +0000399 to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a
400 zero reference count may be freed (or at least considered to be
sewardjf98e1c02008-10-25 16:22:41 +0000401 unreferenced by this library).
402*/
philippe1475a7f2015-05-11 19:45:08 +0000403static void zsm_init ( void );
sewardjf98e1c02008-10-25 16:22:41 +0000404
sewardj23f12002009-07-24 08:45:08 +0000405static void zsm_sset_range ( Addr, SizeT, SVal );
philippef54cb662015-05-10 22:19:31 +0000406static void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew );
sewardj23f12002009-07-24 08:45:08 +0000407static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000408static void zsm_flush_cache ( void );
409
410#endif /* ! __HB_ZSM_H */
411
412
sewardjf98e1c02008-10-25 16:22:41 +0000413/* Round a up to the next multiple of N. N must be a power of 2 */
414#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
415/* Round a down to the next multiple of N. N must be a power of 2 */
416#define ROUNDDN(a, N) ((a) & ~(N-1))
417
philippef54cb662015-05-10 22:19:31 +0000418/* True if a belongs in range [start, start + szB[
419 (i.e. start + szB is excluded). */
420static inline Bool address_in_range (Addr a, Addr start, SizeT szB)
421{
422 /* Checking start <= a && a < start + szB.
423 As start and a are unsigned addresses, the condition can
424 be simplified. */
425 if (CHECK_ZSM)
426 tl_assert ((a - start < szB)
427 == (start <= a
428 && a < start + szB));
429 return a - start < szB;
430}
sewardjf98e1c02008-10-25 16:22:41 +0000431
sewardjf98e1c02008-10-25 16:22:41 +0000432/* ------ CacheLine ------ */
433
434#define N_LINE_BITS 6 /* must be >= 3 */
435#define N_LINE_ARANGE (1 << N_LINE_BITS)
436#define N_LINE_TREES (N_LINE_ARANGE >> 3)
437
438typedef
439 struct {
440 UShort descrs[N_LINE_TREES];
441 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
442 }
443 CacheLine;
444
445#define TREE_DESCR_16_0 (1<<0)
446#define TREE_DESCR_32_0 (1<<1)
447#define TREE_DESCR_16_1 (1<<2)
448#define TREE_DESCR_64 (1<<3)
449#define TREE_DESCR_16_2 (1<<4)
450#define TREE_DESCR_32_1 (1<<5)
451#define TREE_DESCR_16_3 (1<<6)
452#define TREE_DESCR_8_0 (1<<7)
453#define TREE_DESCR_8_1 (1<<8)
454#define TREE_DESCR_8_2 (1<<9)
455#define TREE_DESCR_8_3 (1<<10)
456#define TREE_DESCR_8_4 (1<<11)
457#define TREE_DESCR_8_5 (1<<12)
458#define TREE_DESCR_8_6 (1<<13)
459#define TREE_DESCR_8_7 (1<<14)
460#define TREE_DESCR_DTY (1<<15)
461
462typedef
463 struct {
464 SVal dict[4]; /* can represent up to 4 diff values in the line */
465 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
466 dict indexes */
467 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
468 LineF to use, and dict[2..] are also SVal_INVALID. */
469 }
470 LineZ; /* compressed rep for a cache line */
471
472typedef
473 struct {
474 Bool inUse;
475 SVal w64s[N_LINE_ARANGE];
476 }
477 LineF; /* full rep for a cache line */
478
479/* Shadow memory.
480 Primary map is a WordFM Addr SecMap*.
481 SecMaps cover some page-size-ish section of address space and hold
482 a compressed representation.
483 CacheLine-sized chunks of SecMaps are copied into a Cache, being
484 decompressed when moved into the cache and recompressed on the
485 way out. Because of this, the cache must operate as a writeback
486 cache, not a writethrough one.
487
488 Each SecMap must hold a power-of-2 number of CacheLines. Hence
489 N_SECMAP_BITS must >= N_LINE_BITS.
490*/
491#define N_SECMAP_BITS 13
492#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
493
494// # CacheLines held by a SecMap
495#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
496
497/* The data in the SecMap is held in the array of LineZs. Each LineZ
498 either carries the required data directly, in a compressed
499 representation, or it holds (in .dict[0]) an index to the LineF in
500 .linesF that holds the full representation.
501
502 Currently-unused LineF's have their .inUse bit set to zero.
503 Since each in-use LineF is referred to be exactly one LineZ,
504 the number of .linesZ[] that refer to .linesF should equal
505 the number of .linesF[] that have .inUse == True.
506
507 RC obligations: the RCs presented to the user include exactly
508 the values in:
509 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
510 * F reps that are in use (.inUse == True)
511
512 Hence the following actions at the following transitions are required:
513
514 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
515 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
516 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
517 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
518*/
519typedef
520 struct {
521 UInt magic;
522 LineZ linesZ[N_SECMAP_ZLINES];
523 LineF* linesF;
524 UInt linesF_size;
525 }
526 SecMap;
527
528#define SecMap_MAGIC 0x571e58cbU
529
philippef54cb662015-05-10 22:19:31 +0000530// (UInt) `echo "Free SecMap" | md5sum`
531#define SecMap_free_MAGIC 0x5a977f30U
532
sewardj5aa09bf2014-06-20 14:25:53 +0000533__attribute__((unused))
sewardjf98e1c02008-10-25 16:22:41 +0000534static inline Bool is_sane_SecMap ( SecMap* sm ) {
535 return sm != NULL && sm->magic == SecMap_MAGIC;
536}
537
538/* ------ Cache ------ */
539
540#define N_WAY_BITS 16
541#define N_WAY_NENT (1 << N_WAY_BITS)
542
543/* Each tag is the address of the associated CacheLine, rounded down
544 to a CacheLine address boundary. A CacheLine size must be a power
545 of 2 and must be 8 or more. Hence an easy way to initialise the
546 cache so it is empty is to set all the tag values to any value % 8
547 != 0, eg 1. This means all queries in the cache initially miss.
548 It does however require us to detect and not writeback, any line
549 with a bogus tag. */
550typedef
551 struct {
552 CacheLine lyns0[N_WAY_NENT];
553 Addr tags0[N_WAY_NENT];
554 }
555 Cache;
556
557static inline Bool is_valid_scache_tag ( Addr tag ) {
558 /* a valid tag should be naturally aligned to the start of
559 a CacheLine. */
560 return 0 == (tag & (N_LINE_ARANGE - 1));
561}
562
563
564/* --------- Primary data structures --------- */
565
566/* Shadow memory primary map */
567static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
568static Cache cache_shmem;
569
570
571static UWord stats__secmaps_search = 0; // # SM finds
572static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
573static UWord stats__secmaps_allocd = 0; // # SecMaps issued
philippef54cb662015-05-10 22:19:31 +0000574static UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live'
575static UWord stats__secmaps_scanGC = 0; // # nr of scan GC done.
576static UWord stats__secmaps_scanGCed = 0; // # SecMaps GC-ed via scan
577static UWord stats__secmaps_ssetGCed = 0; // # SecMaps GC-ed via setnoaccess
sewardjf98e1c02008-10-25 16:22:41 +0000578static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
579static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
580static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
581static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
582static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
sewardjf98e1c02008-10-25 16:22:41 +0000583static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
584static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
585static UWord stats__cache_F_fetches = 0; // # F lines fetched
586static UWord stats__cache_F_wbacks = 0; // # F lines written back
philippef54cb662015-05-10 22:19:31 +0000587static UWord stats__cache_flushes_invals = 0; // # cache flushes and invals
sewardjf98e1c02008-10-25 16:22:41 +0000588static UWord stats__cache_totrefs = 0; // # total accesses
589static UWord stats__cache_totmisses = 0; // # misses
590static ULong stats__cache_make_New_arange = 0; // total arange made New
591static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
592static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000593static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
594static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
595static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
596static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
597static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
598static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
599static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
600static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
601static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
602static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
603static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
604static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
605static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
606static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000607static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
608static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
609static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
610static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
611static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
612static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000613static UWord stats__vts__tick = 0; // # calls to VTS__tick
614static UWord stats__vts__join = 0; // # calls to VTS__join
615static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
616static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
philippef54cb662015-05-10 22:19:31 +0000617static UWord stats__vts_tab_GC = 0; // # nr of vts_tab GC
philippe2bd23262015-05-11 20:56:49 +0000618static UWord stats__vts_pruning = 0; // # nr of vts pruning
sewardj7aa38a92011-02-27 23:04:12 +0000619
620// # calls to VTS__cmp_structural w/ slow case
621static UWord stats__vts__cmp_structural_slow = 0;
622
623// # calls to VTS__indexAt_SLOW
624static UWord stats__vts__indexat_slow = 0;
625
626// # calls to vts_set__find__or__clone_and_add
627static UWord stats__vts_set__focaa = 0;
628
629// # calls to vts_set__find__or__clone_and_add that lead to an
630// allocation
631static UWord stats__vts_set__focaa_a = 0;
sewardjc8028ad2010-05-05 09:34:42 +0000632
sewardjf98e1c02008-10-25 16:22:41 +0000633
634static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
635 return a & ~(N_SECMAP_ARANGE - 1);
636}
637static inline UWord shmem__get_SecMap_offset ( Addr a ) {
638 return a & (N_SECMAP_ARANGE - 1);
639}
640
641
642/*----------------------------------------------------------------*/
643/*--- map_shmem :: WordFM Addr SecMap ---*/
644/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
645/*----------------------------------------------------------------*/
646
647/*--------------- SecMap allocation --------------- */
648
649static HChar* shmem__bigchunk_next = NULL;
650static HChar* shmem__bigchunk_end1 = NULL;
651
652static void* shmem__bigchunk_alloc ( SizeT n )
653{
654 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
655 tl_assert(n > 0);
656 n = VG_ROUNDUP(n, 16);
657 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
658 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
659 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
660 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
661 if (0)
662 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
663 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
664 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
665 if (shmem__bigchunk_next == NULL)
666 VG_(out_of_memory_NORETURN)(
667 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
668 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
669 }
670 tl_assert(shmem__bigchunk_next);
671 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
672 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
673 shmem__bigchunk_next += n;
674 return shmem__bigchunk_next - n;
675}
676
philippef54cb662015-05-10 22:19:31 +0000677/* SecMap changed to be fully SVal_NOACCESS are inserted in a list of
678 recycled SecMap. When a new SecMap is needed, a recycled SecMap
679 will be used in preference to allocating a new SecMap. */
680/* We make a linked list of SecMap. LinesF pointer is re-used to
681 implement the link list. */
682static SecMap *SecMap_freelist = NULL;
683static UWord SecMap_freelist_length(void)
684{
685 SecMap *sm;
686 UWord n = 0;
687
688 sm = SecMap_freelist;
689 while (sm) {
690 n++;
691 sm = (SecMap*)sm->linesF;
692 }
693 return n;
694}
695
696static void push_SecMap_on_freelist(SecMap* sm)
697{
698 if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm);
699 sm->magic = SecMap_free_MAGIC;
700 sm->linesF = (LineF*)SecMap_freelist;
701 SecMap_freelist = sm;
702}
703/* Returns a free SecMap if there is one.
704 Otherwise, returns NULL. */
705static SecMap *pop_SecMap_from_freelist(void)
706{
707 SecMap *sm;
708
709 sm = SecMap_freelist;
710 if (sm) {
711 tl_assert (sm->magic == SecMap_free_MAGIC);
712 SecMap_freelist = (SecMap*)sm->linesF;
713 if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm);
714 }
715 return sm;
716}
717
718static SecMap* shmem__alloc_or_recycle_SecMap ( void )
sewardjf98e1c02008-10-25 16:22:41 +0000719{
720 Word i, j;
philippef54cb662015-05-10 22:19:31 +0000721 SecMap* sm = pop_SecMap_from_freelist();
722
723 if (!sm) {
724 sm = shmem__bigchunk_alloc( sizeof(SecMap) );
725 stats__secmaps_allocd++;
726 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
727 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
728 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
729 }
sewardjf98e1c02008-10-25 16:22:41 +0000730 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
731 tl_assert(sm);
732 sm->magic = SecMap_MAGIC;
733 for (i = 0; i < N_SECMAP_ZLINES; i++) {
734 sm->linesZ[i].dict[0] = SVal_NOACCESS;
735 sm->linesZ[i].dict[1] = SVal_INVALID;
736 sm->linesZ[i].dict[2] = SVal_INVALID;
737 sm->linesZ[i].dict[3] = SVal_INVALID;
738 for (j = 0; j < N_LINE_ARANGE/4; j++)
739 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
740 }
741 sm->linesF = NULL;
742 sm->linesF_size = 0;
sewardjf98e1c02008-10-25 16:22:41 +0000743 return sm;
744}
745
746typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
747static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
748
749static SecMap* shmem__find_SecMap ( Addr ga )
750{
751 SecMap* sm = NULL;
752 Addr gaKey = shmem__round_to_SecMap_base(ga);
753 // Cache
754 stats__secmaps_search++;
755 if (LIKELY(gaKey == smCache[0].gaKey))
756 return smCache[0].sm;
757 if (LIKELY(gaKey == smCache[1].gaKey)) {
758 SMCacheEnt tmp = smCache[0];
759 smCache[0] = smCache[1];
760 smCache[1] = tmp;
761 return smCache[0].sm;
762 }
763 if (gaKey == smCache[2].gaKey) {
764 SMCacheEnt tmp = smCache[1];
765 smCache[1] = smCache[2];
766 smCache[2] = tmp;
767 return smCache[1].sm;
768 }
769 // end Cache
770 stats__secmaps_search_slow++;
771 if (VG_(lookupFM)( map_shmem,
772 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
773 tl_assert(sm != NULL);
774 smCache[2] = smCache[1];
775 smCache[1] = smCache[0];
776 smCache[0].gaKey = gaKey;
777 smCache[0].sm = sm;
778 } else {
779 tl_assert(sm == NULL);
780 }
781 return sm;
782}
783
philippef54cb662015-05-10 22:19:31 +0000784/* Scan the SecMap and count the SecMap that can be GC-ed.
785 If really, really does the GC of the SecMap. */
786/* NOT TO BE CALLED FROM WITHIN libzsm. */
787static UWord next_SecMap_GC_at = 1000;
788__attribute__((noinline))
789static UWord shmem__SecMap_do_GC(Bool really)
790{
791 UWord secmapW = 0;
792 Addr gaKey;
793 UWord examined = 0;
794 UWord ok_GCed = 0;
795
796 /* First invalidate the smCache */
797 smCache[0].gaKey = 1;
798 smCache[1].gaKey = 1;
799 smCache[2].gaKey = 1;
800 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0]));
801
802 VG_(initIterFM)( map_shmem );
803 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
804 UWord i;
805 UWord j;
806 SecMap* sm = (SecMap*)secmapW;
807 tl_assert(sm->magic == SecMap_MAGIC);
808 Bool ok_to_GC = True;
809
810 examined++;
811
812 /* Deal with the LineZs */
813 for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) {
814 LineZ* lineZ = &sm->linesZ[i];
815 ok_to_GC = lineZ->dict[0] == SVal_INVALID
816 || (lineZ->dict[0] == SVal_NOACCESS
817 && !SVal__isC (lineZ->dict[1])
818 && !SVal__isC (lineZ->dict[2])
819 && !SVal__isC (lineZ->dict[3]));
820 }
821 /* Deal with the LineFs */
822 for (i = 0; i < sm->linesF_size && ok_to_GC; i++) {
823 LineF* lineF = &sm->linesF[i];
824 if (!lineF->inUse)
825 continue;
826 for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++)
827 ok_to_GC = lineF->w64s[j] == SVal_NOACCESS;
828 }
829 if (ok_to_GC)
830 ok_GCed++;
831 if (ok_to_GC && really) {
832 SecMap *fm_sm;
833 Addr fm_gaKey;
834 /* We cannot remove a SecMap from map_shmem while iterating.
835 So, stop iteration, remove from map_shmem, recreate the iteration
836 on the next SecMap. */
837 VG_(doneIterFM) ( map_shmem );
838 /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS or
839 not in use. We just need to free the linesF. */
840 if (sm->linesF_size > 0) {
841 HG_(free)(sm->linesF);
842 stats__secmap_linesF_allocd -= sm->linesF_size;
843 stats__secmap_linesF_bytes -= sm->linesF_size * sizeof(LineF);
844 }
845 if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey))
846 tl_assert (0);
847 stats__secmaps_in_map_shmem--;
848 tl_assert (gaKey == fm_gaKey);
849 tl_assert (sm == fm_sm);
850 stats__secmaps_scanGCed++;
851 push_SecMap_on_freelist (sm);
852 VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE);
853 }
854 }
855 VG_(doneIterFM)( map_shmem );
856
857 if (really) {
858 stats__secmaps_scanGC++;
859 /* Next GC when we approach the max allocated */
860 next_SecMap_GC_at = stats__secmaps_allocd - 1000;
861 /* Unless we GCed less than 10%. We then allow to alloc 10%
862 more before GCing. This avoids doing a lot of costly GC
863 for the worst case : the 'growing phase' of an application
864 that allocates a lot of memory.
865 Worst can can be reproduced e.g. by
866 perf/memrw -t 30000000 -b 1000 -r 1 -l 1
867 that allocates around 30Gb of memory. */
868 if (ok_GCed < stats__secmaps_allocd/10)
869 next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10;
870
871 }
872
873 if (VG_(clo_stats) && really) {
874 VG_(message)(Vg_DebugMsg,
875 "libhb: SecMap GC: #%lu scanned %lu, GCed %lu,"
876 " next GC at %lu\n",
877 stats__secmaps_scanGC, examined, ok_GCed,
878 next_SecMap_GC_at);
879 }
880
881 return ok_GCed;
882}
883
sewardjf98e1c02008-10-25 16:22:41 +0000884static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
885{
886 SecMap* sm = shmem__find_SecMap ( ga );
887 if (LIKELY(sm)) {
philippef54cb662015-05-10 22:19:31 +0000888 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000889 return sm;
890 } else {
891 /* create a new one */
892 Addr gaKey = shmem__round_to_SecMap_base(ga);
philippef54cb662015-05-10 22:19:31 +0000893 sm = shmem__alloc_or_recycle_SecMap();
sewardjf98e1c02008-10-25 16:22:41 +0000894 tl_assert(sm);
895 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
philippef54cb662015-05-10 22:19:31 +0000896 stats__secmaps_in_map_shmem++;
897 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000898 return sm;
899 }
900}
901
902
903/* ------------ LineF and LineZ related ------------ */
904
905static void rcinc_LineF ( LineF* lineF ) {
906 UWord i;
907 tl_assert(lineF->inUse);
908 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000909 SVal__rcinc(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000910}
911
912static void rcdec_LineF ( LineF* lineF ) {
913 UWord i;
914 tl_assert(lineF->inUse);
915 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000916 SVal__rcdec(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000917}
918
919static void rcinc_LineZ ( LineZ* lineZ ) {
920 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000921 SVal__rcinc(lineZ->dict[0]);
922 if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]);
923 if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]);
924 if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000925}
926
927static void rcdec_LineZ ( LineZ* lineZ ) {
928 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000929 SVal__rcdec(lineZ->dict[0]);
930 if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]);
931 if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]);
932 if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000933}
934
935inline
936static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
937 Word bix, shft, mask, prep;
938 tl_assert(ix >= 0);
939 bix = ix >> 2;
940 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
941 mask = 3 << shft;
942 prep = b2 << shft;
943 arr[bix] = (arr[bix] & ~mask) | prep;
944}
945
946inline
947static UWord read_twobit_array ( UChar* arr, UWord ix ) {
948 Word bix, shft;
949 tl_assert(ix >= 0);
950 bix = ix >> 2;
951 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
952 return (arr[bix] >> shft) & 3;
953}
954
955/* Given address 'tag', find either the Z or F line containing relevant
956 data, so it can be read into the cache.
957*/
958static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
959 /*OUT*/LineF** fp, Addr tag ) {
960 LineZ* lineZ;
961 LineF* lineF;
962 UWord zix;
963 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
964 UWord smoff = shmem__get_SecMap_offset(tag);
965 /* since smoff is derived from a valid tag, it should be
966 cacheline-aligned. */
967 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
968 zix = smoff >> N_LINE_BITS;
969 tl_assert(zix < N_SECMAP_ZLINES);
970 lineZ = &sm->linesZ[zix];
971 lineF = NULL;
972 if (lineZ->dict[0] == SVal_INVALID) {
973 UInt fix = (UInt)lineZ->dict[1];
974 tl_assert(sm->linesF);
975 tl_assert(sm->linesF_size > 0);
976 tl_assert(fix >= 0 && fix < sm->linesF_size);
977 lineF = &sm->linesF[fix];
978 tl_assert(lineF->inUse);
979 lineZ = NULL;
980 }
981 *zp = lineZ;
982 *fp = lineF;
983}
984
985/* Given address 'tag', return the relevant SecMap and the index of
986 the LineZ within it, in the expectation that the line is to be
987 overwritten. Regardless of whether 'tag' is currently associated
988 with a Z or F representation, to rcdec on the current
989 representation, in recognition of the fact that the contents are
990 just about to be overwritten. */
991static __attribute__((noinline))
992void find_Z_for_writing ( /*OUT*/SecMap** smp,
993 /*OUT*/Word* zixp,
994 Addr tag ) {
995 LineZ* lineZ;
996 LineF* lineF;
997 UWord zix;
998 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
999 UWord smoff = shmem__get_SecMap_offset(tag);
1000 /* since smoff is derived from a valid tag, it should be
1001 cacheline-aligned. */
1002 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1003 zix = smoff >> N_LINE_BITS;
1004 tl_assert(zix < N_SECMAP_ZLINES);
1005 lineZ = &sm->linesZ[zix];
1006 lineF = NULL;
1007 /* re RCs, we are freeing up this LineZ/LineF so that new data can
1008 be parked in it. Hence have to rcdec it accordingly. */
1009 /* If lineZ has an associated lineF, free it up. */
1010 if (lineZ->dict[0] == SVal_INVALID) {
1011 UInt fix = (UInt)lineZ->dict[1];
1012 tl_assert(sm->linesF);
1013 tl_assert(sm->linesF_size > 0);
1014 tl_assert(fix >= 0 && fix < sm->linesF_size);
1015 lineF = &sm->linesF[fix];
1016 tl_assert(lineF->inUse);
1017 rcdec_LineF(lineF);
1018 lineF->inUse = False;
1019 } else {
1020 rcdec_LineZ(lineZ);
1021 }
1022 *smp = sm;
1023 *zixp = zix;
1024}
1025
1026static __attribute__((noinline))
1027void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
1028 UInt i, new_size;
1029 LineF* nyu;
1030
1031 if (sm->linesF) {
1032 tl_assert(sm->linesF_size > 0);
1033 } else {
1034 tl_assert(sm->linesF_size == 0);
1035 }
1036
1037 if (sm->linesF) {
1038 for (i = 0; i < sm->linesF_size; i++) {
1039 if (!sm->linesF[i].inUse) {
1040 *fixp = (Word)i;
1041 return;
1042 }
1043 }
1044 }
1045
1046 /* No free F line found. Expand existing array and try again. */
1047 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
1048 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
1049 new_size * sizeof(LineF) );
sewardjf98e1c02008-10-25 16:22:41 +00001050
1051 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
1052 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
1053 * sizeof(LineF);
1054
1055 if (0)
1056 VG_(printf)("SM %p: expand F array from %d to %d\n",
1057 sm, (Int)sm->linesF_size, new_size);
1058
1059 for (i = 0; i < new_size; i++)
1060 nyu[i].inUse = False;
1061
1062 if (sm->linesF) {
1063 for (i = 0; i < sm->linesF_size; i++) {
1064 tl_assert(sm->linesF[i].inUse);
1065 nyu[i] = sm->linesF[i];
1066 }
1067 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
1068 HG_(free)(sm->linesF);
1069 }
1070
1071 sm->linesF = nyu;
1072 sm->linesF_size = new_size;
1073
1074 for (i = 0; i < sm->linesF_size; i++) {
1075 if (!sm->linesF[i].inUse) {
1076 *fixp = (Word)i;
1077 return;
1078 }
philippe47124e92015-04-25 14:00:24 +00001079 }
sewardjf98e1c02008-10-25 16:22:41 +00001080
philippe47124e92015-04-25 14:00:24 +00001081 /*NOTREACHED*/
1082 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00001083}
1084
1085
1086/* ------------ CacheLine and implicit-tree related ------------ */
1087
1088__attribute__((unused))
1089static void pp_CacheLine ( CacheLine* cl ) {
1090 Word i;
1091 if (!cl) {
1092 VG_(printf)("%s","pp_CacheLine(NULL)\n");
1093 return;
1094 }
1095 for (i = 0; i < N_LINE_TREES; i++)
1096 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
1097 for (i = 0; i < N_LINE_ARANGE; i++)
1098 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
1099}
1100
1101static UChar descr_to_validbits ( UShort descr )
1102{
1103 /* a.k.a Party Time for gcc's constant folder */
1104# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
1105 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
1106 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
1107 ( (b8_5) << 12) | ( (b8_4) << 11) | \
1108 ( (b8_3) << 10) | ( (b8_2) << 9) | \
1109 ( (b8_1) << 8) | ( (b8_0) << 7) | \
1110 ( (b16_3) << 6) | ( (b32_1) << 5) | \
1111 ( (b16_2) << 4) | ( (b64) << 3) | \
1112 ( (b16_1) << 2) | ( (b32_0) << 1) | \
1113 ( (b16_0) << 0) ) )
1114
1115# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
1116 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
1117 ( (bit5) << 5) | ( (bit4) << 4) | \
1118 ( (bit3) << 3) | ( (bit2) << 2) | \
1119 ( (bit1) << 1) | ( (bit0) << 0) ) )
1120
1121 /* these should all get folded out at compile time */
1122 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
1123 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
1124 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
1125 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
1126 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
1127 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
1128 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
1129 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
1130 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
1131
1132 switch (descr) {
1133 /*
1134 +--------------------------------- TREE_DESCR_8_7
1135 | +------------------- TREE_DESCR_8_0
1136 | | +---------------- TREE_DESCR_16_3
1137 | | | +-------------- TREE_DESCR_32_1
1138 | | | | +------------ TREE_DESCR_16_2
1139 | | | | | +--------- TREE_DESCR_64
1140 | | | | | | +------ TREE_DESCR_16_1
1141 | | | | | | | +---- TREE_DESCR_32_0
1142 | | | | | | | | +-- TREE_DESCR_16_0
1143 | | | | | | | | |
1144 | | | | | | | | | GRANULARITY, 7 -> 0 */
1145 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
1146 return BYTE(1,1,1,1,1,1,1,1);
1147 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
1148 return BYTE(1,1,0,1,1,1,1,1);
1149 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
1150 return BYTE(0,1,1,1,1,1,1,1);
1151 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
1152 return BYTE(0,1,0,1,1,1,1,1);
1153
1154 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
1155 return BYTE(1,1,1,1,1,1,0,1);
1156 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
1157 return BYTE(1,1,0,1,1,1,0,1);
1158 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
1159 return BYTE(0,1,1,1,1,1,0,1);
1160 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
1161 return BYTE(0,1,0,1,1,1,0,1);
1162
1163 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
1164 return BYTE(1,1,1,1,0,1,1,1);
1165 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1166 return BYTE(1,1,0,1,0,1,1,1);
1167 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1168 return BYTE(0,1,1,1,0,1,1,1);
1169 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1170 return BYTE(0,1,0,1,0,1,1,1);
1171
1172 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1173 return BYTE(1,1,1,1,0,1,0,1);
1174 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1175 return BYTE(1,1,0,1,0,1,0,1);
1176 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1177 return BYTE(0,1,1,1,0,1,0,1);
1178 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1179 return BYTE(0,1,0,1,0,1,0,1);
1180
1181 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1182 return BYTE(0,0,0,1,1,1,1,1);
1183 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1184 return BYTE(0,0,0,1,1,1,0,1);
1185 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1186 return BYTE(0,0,0,1,0,1,1,1);
1187 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1188 return BYTE(0,0,0,1,0,1,0,1);
1189
1190 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1191 return BYTE(1,1,1,1,0,0,0,1);
1192 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1193 return BYTE(1,1,0,1,0,0,0,1);
1194 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1195 return BYTE(0,1,1,1,0,0,0,1);
1196 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1197 return BYTE(0,1,0,1,0,0,0,1);
1198
1199 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1200 return BYTE(0,0,0,1,0,0,0,1);
1201
1202 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1203 return BYTE(0,0,0,0,0,0,0,1);
1204
1205 default: return BYTE(0,0,0,0,0,0,0,0);
1206 /* INVALID - any valid descr produces at least one
1207 valid bit in tree[0..7]*/
1208 }
1209 /* NOTREACHED*/
1210 tl_assert(0);
1211
1212# undef DESCR
1213# undef BYTE
1214}
1215
1216__attribute__((unused))
1217static Bool is_sane_Descr ( UShort descr ) {
1218 return descr_to_validbits(descr) != 0;
1219}
1220
1221static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1222 VG_(sprintf)(dst,
1223 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1224 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1225 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1226 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1227 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1228 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1229 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1230 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1231 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1232 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1233 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1234 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1235 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1236 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1237 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1238 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1239 );
1240}
1241static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1242 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1243 (Int)((byte & 128) ? 1 : 0),
1244 (Int)((byte & 64) ? 1 : 0),
1245 (Int)((byte & 32) ? 1 : 0),
1246 (Int)((byte & 16) ? 1 : 0),
1247 (Int)((byte & 8) ? 1 : 0),
1248 (Int)((byte & 4) ? 1 : 0),
1249 (Int)((byte & 2) ? 1 : 0),
1250 (Int)((byte & 1) ? 1 : 0)
1251 );
1252}
1253
1254static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1255 Word i;
1256 UChar validbits = descr_to_validbits(descr);
florian7b7d5942014-12-19 20:29:22 +00001257 HChar buf[128], buf2[128]; // large enough
sewardjf98e1c02008-10-25 16:22:41 +00001258 if (validbits == 0)
1259 goto bad;
1260 for (i = 0; i < 8; i++) {
1261 if (validbits & (1<<i)) {
1262 if (tree[i] == SVal_INVALID)
1263 goto bad;
1264 } else {
1265 if (tree[i] != SVal_INVALID)
1266 goto bad;
1267 }
1268 }
1269 return True;
1270 bad:
1271 sprintf_Descr( buf, descr );
1272 sprintf_Byte( buf2, validbits );
1273 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1274 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1275 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1276 for (i = 0; i < 8; i++)
1277 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1278 VG_(printf)("%s","}\n");
1279 return 0;
1280}
1281
1282static Bool is_sane_CacheLine ( CacheLine* cl )
1283{
1284 Word tno, cloff;
1285
1286 if (!cl) goto bad;
1287
1288 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1289 UShort descr = cl->descrs[tno];
1290 SVal* tree = &cl->svals[cloff];
1291 if (!is_sane_Descr_and_Tree(descr, tree))
1292 goto bad;
1293 }
1294 tl_assert(cloff == N_LINE_ARANGE);
1295 return True;
1296 bad:
1297 pp_CacheLine(cl);
1298 return False;
1299}
1300
1301static UShort normalise_tree ( /*MOD*/SVal* tree )
1302{
1303 UShort descr;
1304 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1305 particular no zeroes. */
philippe1475a7f2015-05-11 19:45:08 +00001306 if (CHECK_ZSM
1307 && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1308 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1309 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1310 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
sewardjf98e1c02008-10-25 16:22:41 +00001311 tl_assert(0);
1312
1313 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1314 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1315 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1316 /* build 16-bit layer */
1317 if (tree[1] == tree[0]) {
1318 tree[1] = SVal_INVALID;
1319 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1320 descr |= TREE_DESCR_16_0;
1321 }
1322 if (tree[3] == tree[2]) {
1323 tree[3] = SVal_INVALID;
1324 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1325 descr |= TREE_DESCR_16_1;
1326 }
1327 if (tree[5] == tree[4]) {
1328 tree[5] = SVal_INVALID;
1329 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1330 descr |= TREE_DESCR_16_2;
1331 }
1332 if (tree[7] == tree[6]) {
1333 tree[7] = SVal_INVALID;
1334 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1335 descr |= TREE_DESCR_16_3;
1336 }
1337 /* build 32-bit layer */
1338 if (tree[2] == tree[0]
1339 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1340 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1341 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1342 descr |= TREE_DESCR_32_0;
1343 }
1344 if (tree[6] == tree[4]
1345 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1346 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1347 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1348 descr |= TREE_DESCR_32_1;
1349 }
1350 /* build 64-bit layer */
1351 if (tree[4] == tree[0]
1352 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1353 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1354 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1355 descr |= TREE_DESCR_64;
1356 }
1357 return descr;
1358}
1359
1360/* This takes a cacheline where all the data is at the leaves
1361 (w8[..]) and builds a correctly normalised tree. */
1362static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1363{
1364 Word tno, cloff;
1365 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1366 SVal* tree = &cl->svals[cloff];
1367 cl->descrs[tno] = normalise_tree( tree );
1368 }
1369 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +00001370 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001371 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1372 stats__cline_normalises++;
1373}
1374
1375
1376typedef struct { UChar count; SVal sval; } CountedSVal;
1377
1378static
1379void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1380 /*OUT*/Word* dstUsedP,
1381 Word nDst, CacheLine* src )
1382{
1383 Word tno, cloff, dstUsed;
1384
1385 tl_assert(nDst == N_LINE_ARANGE);
1386 dstUsed = 0;
1387
1388 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1389 UShort descr = src->descrs[tno];
1390 SVal* tree = &src->svals[cloff];
1391
1392 /* sequentialise the tree described by (descr,tree). */
1393# define PUT(_n,_v) \
1394 do { dst[dstUsed ].count = (_n); \
1395 dst[dstUsed++].sval = (_v); \
1396 } while (0)
1397
1398 /* byte 0 */
1399 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1400 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1401 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1402 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1403 /* byte 1 */
1404 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1405 /* byte 2 */
1406 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1407 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1408 /* byte 3 */
1409 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1410 /* byte 4 */
1411 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1412 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1413 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1414 /* byte 5 */
1415 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1416 /* byte 6 */
1417 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1418 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1419 /* byte 7 */
1420 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1421
1422# undef PUT
1423 /* END sequentialise the tree described by (descr,tree). */
1424
1425 }
1426 tl_assert(cloff == N_LINE_ARANGE);
1427 tl_assert(dstUsed <= nDst);
1428
1429 *dstUsedP = dstUsed;
1430}
1431
1432/* Write the cacheline 'wix' to backing store. Where it ends up
1433 is determined by its tag field. */
1434static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1435{
1436 Word i, j, k, m;
1437 Addr tag;
1438 SecMap* sm;
1439 CacheLine* cl;
1440 LineZ* lineZ;
1441 LineF* lineF;
1442 Word zix, fix, csvalsUsed;
1443 CountedSVal csvals[N_LINE_ARANGE];
1444 SVal sv;
1445
1446 if (0)
1447 VG_(printf)("scache wback line %d\n", (Int)wix);
1448
1449 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1450
1451 tag = cache_shmem.tags0[wix];
1452 cl = &cache_shmem.lyns0[wix];
1453
1454 /* The cache line may have been invalidated; if so, ignore it. */
1455 if (!is_valid_scache_tag(tag))
1456 return;
1457
1458 /* Where are we going to put it? */
1459 sm = NULL;
1460 lineZ = NULL;
1461 lineF = NULL;
1462 zix = fix = -1;
1463
1464 /* find the Z line to write in and rcdec it or the associated F
1465 line. */
1466 find_Z_for_writing( &sm, &zix, tag );
1467
1468 tl_assert(sm);
1469 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1470 lineZ = &sm->linesZ[zix];
1471
1472 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001473 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001474 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1475
1476 csvalsUsed = -1;
1477 sequentialise_CacheLine( csvals, &csvalsUsed,
1478 N_LINE_ARANGE, cl );
1479 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1480 if (0) VG_(printf)("%lu ", csvalsUsed);
1481
1482 lineZ->dict[0] = lineZ->dict[1]
1483 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1484
1485 /* i indexes actual shadow values, k is cursor in csvals */
1486 i = 0;
1487 for (k = 0; k < csvalsUsed; k++) {
1488
1489 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001490 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001491 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1492 /* do we already have it? */
1493 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1494 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1495 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1496 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1497 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001498 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001499 tl_assert(sv != SVal_INVALID);
1500 if (lineZ->dict[0]
1501 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1502 if (lineZ->dict[1]
1503 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1504 if (lineZ->dict[2]
1505 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1506 if (lineZ->dict[3]
1507 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1508 break; /* we'll have to use the f rep */
1509 dict_ok:
1510 m = csvals[k].count;
1511 if (m == 8) {
1512 write_twobit_array( lineZ->ix2s, i+0, j );
1513 write_twobit_array( lineZ->ix2s, i+1, j );
1514 write_twobit_array( lineZ->ix2s, i+2, j );
1515 write_twobit_array( lineZ->ix2s, i+3, j );
1516 write_twobit_array( lineZ->ix2s, i+4, j );
1517 write_twobit_array( lineZ->ix2s, i+5, j );
1518 write_twobit_array( lineZ->ix2s, i+6, j );
1519 write_twobit_array( lineZ->ix2s, i+7, j );
1520 i += 8;
1521 }
1522 else if (m == 4) {
1523 write_twobit_array( lineZ->ix2s, i+0, j );
1524 write_twobit_array( lineZ->ix2s, i+1, j );
1525 write_twobit_array( lineZ->ix2s, i+2, j );
1526 write_twobit_array( lineZ->ix2s, i+3, j );
1527 i += 4;
1528 }
1529 else if (m == 1) {
1530 write_twobit_array( lineZ->ix2s, i+0, j );
1531 i += 1;
1532 }
1533 else if (m == 2) {
1534 write_twobit_array( lineZ->ix2s, i+0, j );
1535 write_twobit_array( lineZ->ix2s, i+1, j );
1536 i += 2;
1537 }
1538 else {
1539 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1540 }
1541
1542 }
1543
1544 if (LIKELY(i == N_LINE_ARANGE)) {
1545 /* Construction of the compressed representation was
1546 successful. */
1547 rcinc_LineZ(lineZ);
1548 stats__cache_Z_wbacks++;
1549 } else {
1550 /* Cannot use the compressed(z) representation. Use the full(f)
1551 rep instead. */
1552 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1553 alloc_F_for_writing( sm, &fix );
1554 tl_assert(sm->linesF);
1555 tl_assert(sm->linesF_size > 0);
1556 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1557 lineF = &sm->linesF[fix];
1558 tl_assert(!lineF->inUse);
1559 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1560 lineZ->dict[1] = (SVal)fix;
1561 lineF->inUse = True;
1562 i = 0;
1563 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001564 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001565 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1566 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001567 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001568 tl_assert(sv != SVal_INVALID);
1569 for (m = csvals[k].count; m > 0; m--) {
1570 lineF->w64s[i] = sv;
1571 i++;
1572 }
1573 }
1574 tl_assert(i == N_LINE_ARANGE);
1575 rcinc_LineF(lineF);
1576 stats__cache_F_wbacks++;
1577 }
sewardjf98e1c02008-10-25 16:22:41 +00001578}
1579
1580/* Fetch the cacheline 'wix' from the backing store. The tag
1581 associated with 'wix' is assumed to have already been filled in;
1582 hence that is used to determine where in the backing store to read
1583 from. */
1584static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1585{
1586 Word i;
1587 Addr tag;
1588 CacheLine* cl;
1589 LineZ* lineZ;
1590 LineF* lineF;
1591
1592 if (0)
1593 VG_(printf)("scache fetch line %d\n", (Int)wix);
1594
1595 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1596
1597 tag = cache_shmem.tags0[wix];
1598 cl = &cache_shmem.lyns0[wix];
1599
1600 /* reject nonsense requests */
1601 tl_assert(is_valid_scache_tag(tag));
1602
1603 lineZ = NULL;
1604 lineF = NULL;
1605 find_ZF_for_reading( &lineZ, &lineF, tag );
1606 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1607
1608 /* expand the data into the bottom layer of the tree, then get
1609 cacheline_normalise to build the descriptor array. */
1610 if (lineF) {
1611 tl_assert(lineF->inUse);
1612 for (i = 0; i < N_LINE_ARANGE; i++) {
1613 cl->svals[i] = lineF->w64s[i];
1614 }
1615 stats__cache_F_fetches++;
1616 } else {
1617 for (i = 0; i < N_LINE_ARANGE; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00001618 UWord ix = read_twobit_array( lineZ->ix2s, i );
philippe1475a7f2015-05-11 19:45:08 +00001619 if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3);
1620 cl->svals[i] = lineZ->dict[ix];
1621 if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00001622 }
1623 stats__cache_Z_fetches++;
1624 }
1625 normalise_CacheLine( cl );
1626}
1627
philippe8939e092015-05-11 20:18:10 +00001628/* Invalid the cachelines corresponding to the given range, which
1629 must start and end on a cacheline boundary. */
philippef54cb662015-05-10 22:19:31 +00001630static void shmem__invalidate_scache_range (Addr ga, SizeT szB)
1631{
philippef54cb662015-05-10 22:19:31 +00001632 Word wix;
1633
philippe8939e092015-05-11 20:18:10 +00001634 /* ga must be on a cacheline boundary. */
1635 tl_assert (is_valid_scache_tag (ga));
1636 /* szB must be a multiple of cacheline size. */
1637 tl_assert (0 == (szB & (N_LINE_ARANGE - 1)));
1638
1639
philippef54cb662015-05-10 22:19:31 +00001640 Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1);
1641 Word nwix = szB / N_LINE_ARANGE;
1642
1643 if (nwix > N_WAY_NENT)
1644 nwix = N_WAY_NENT; // no need to check several times the same entry.
1645
1646 for (wix = 0; wix < nwix; wix++) {
1647 if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB))
1648 cache_shmem.tags0[ga_ix] = 1/*INVALID*/;
1649 ga_ix++;
philippe364f0bb2015-05-15 09:38:54 +00001650 if (UNLIKELY(ga_ix == N_WAY_NENT))
philippef54cb662015-05-10 22:19:31 +00001651 ga_ix = 0;
1652 }
sewardjf98e1c02008-10-25 16:22:41 +00001653}
1654
philippef54cb662015-05-10 22:19:31 +00001655
sewardjf98e1c02008-10-25 16:22:41 +00001656static void shmem__flush_and_invalidate_scache ( void ) {
1657 Word wix;
1658 Addr tag;
1659 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1660 tl_assert(!is_valid_scache_tag(1));
1661 for (wix = 0; wix < N_WAY_NENT; wix++) {
1662 tag = cache_shmem.tags0[wix];
1663 if (tag == 1/*INVALID*/) {
1664 /* already invalid; nothing to do */
1665 } else {
1666 tl_assert(is_valid_scache_tag(tag));
1667 cacheline_wback( wix );
1668 }
1669 cache_shmem.tags0[wix] = 1/*INVALID*/;
1670 }
philippef54cb662015-05-10 22:19:31 +00001671 stats__cache_flushes_invals++;
sewardjf98e1c02008-10-25 16:22:41 +00001672}
1673
1674
1675static inline Bool aligned16 ( Addr a ) {
1676 return 0 == (a & 1);
1677}
1678static inline Bool aligned32 ( Addr a ) {
1679 return 0 == (a & 3);
1680}
1681static inline Bool aligned64 ( Addr a ) {
1682 return 0 == (a & 7);
1683}
1684static inline UWord get_cacheline_offset ( Addr a ) {
1685 return (UWord)(a & (N_LINE_ARANGE - 1));
1686}
1687static inline Addr cacheline_ROUNDUP ( Addr a ) {
1688 return ROUNDUP(a, N_LINE_ARANGE);
1689}
1690static inline Addr cacheline_ROUNDDN ( Addr a ) {
1691 return ROUNDDN(a, N_LINE_ARANGE);
1692}
1693static inline UWord get_treeno ( Addr a ) {
1694 return get_cacheline_offset(a) >> 3;
1695}
1696static inline UWord get_tree_offset ( Addr a ) {
1697 return a & 7;
1698}
1699
1700static __attribute__((noinline))
1701 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1702static inline CacheLine* get_cacheline ( Addr a )
1703{
1704 /* tag is 'a' with the in-line offset masked out,
1705 eg a[31]..a[4] 0000 */
1706 Addr tag = a & ~(N_LINE_ARANGE - 1);
1707 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1708 stats__cache_totrefs++;
1709 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1710 return &cache_shmem.lyns0[wix];
1711 } else {
1712 return get_cacheline_MISS( a );
1713 }
1714}
1715
1716static __attribute__((noinline))
1717 CacheLine* get_cacheline_MISS ( Addr a )
1718{
1719 /* tag is 'a' with the in-line offset masked out,
1720 eg a[31]..a[4] 0000 */
1721
1722 CacheLine* cl;
1723 Addr* tag_old_p;
1724 Addr tag = a & ~(N_LINE_ARANGE - 1);
1725 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1726
1727 tl_assert(tag != cache_shmem.tags0[wix]);
1728
1729 /* Dump the old line into the backing store. */
1730 stats__cache_totmisses++;
1731
1732 cl = &cache_shmem.lyns0[wix];
1733 tag_old_p = &cache_shmem.tags0[wix];
1734
1735 if (is_valid_scache_tag( *tag_old_p )) {
1736 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001737 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001738 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1739 cacheline_wback( wix );
1740 }
1741 /* and reload the new one */
1742 *tag_old_p = tag;
1743 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001744 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001745 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1746 return cl;
1747}
1748
1749static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1750 stats__cline_64to32pulldown++;
1751 switch (toff) {
1752 case 0: case 4:
1753 tl_assert(descr & TREE_DESCR_64);
1754 tree[4] = tree[0];
1755 descr &= ~TREE_DESCR_64;
1756 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1757 break;
1758 default:
1759 tl_assert(0);
1760 }
1761 return descr;
1762}
1763
1764static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1765 stats__cline_32to16pulldown++;
1766 switch (toff) {
1767 case 0: case 2:
1768 if (!(descr & TREE_DESCR_32_0)) {
1769 descr = pulldown_to_32(tree, 0, descr);
1770 }
1771 tl_assert(descr & TREE_DESCR_32_0);
1772 tree[2] = tree[0];
1773 descr &= ~TREE_DESCR_32_0;
1774 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1775 break;
1776 case 4: case 6:
1777 if (!(descr & TREE_DESCR_32_1)) {
1778 descr = pulldown_to_32(tree, 4, descr);
1779 }
1780 tl_assert(descr & TREE_DESCR_32_1);
1781 tree[6] = tree[4];
1782 descr &= ~TREE_DESCR_32_1;
1783 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1784 break;
1785 default:
1786 tl_assert(0);
1787 }
1788 return descr;
1789}
1790
1791static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1792 stats__cline_16to8pulldown++;
1793 switch (toff) {
1794 case 0: case 1:
1795 if (!(descr & TREE_DESCR_16_0)) {
1796 descr = pulldown_to_16(tree, 0, descr);
1797 }
1798 tl_assert(descr & TREE_DESCR_16_0);
1799 tree[1] = tree[0];
1800 descr &= ~TREE_DESCR_16_0;
1801 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1802 break;
1803 case 2: case 3:
1804 if (!(descr & TREE_DESCR_16_1)) {
1805 descr = pulldown_to_16(tree, 2, descr);
1806 }
1807 tl_assert(descr & TREE_DESCR_16_1);
1808 tree[3] = tree[2];
1809 descr &= ~TREE_DESCR_16_1;
1810 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1811 break;
1812 case 4: case 5:
1813 if (!(descr & TREE_DESCR_16_2)) {
1814 descr = pulldown_to_16(tree, 4, descr);
1815 }
1816 tl_assert(descr & TREE_DESCR_16_2);
1817 tree[5] = tree[4];
1818 descr &= ~TREE_DESCR_16_2;
1819 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1820 break;
1821 case 6: case 7:
1822 if (!(descr & TREE_DESCR_16_3)) {
1823 descr = pulldown_to_16(tree, 6, descr);
1824 }
1825 tl_assert(descr & TREE_DESCR_16_3);
1826 tree[7] = tree[6];
1827 descr &= ~TREE_DESCR_16_3;
1828 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1829 break;
1830 default:
1831 tl_assert(0);
1832 }
1833 return descr;
1834}
1835
1836
1837static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1838 UShort mask;
1839 switch (toff) {
1840 case 0:
1841 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1842 tl_assert( (descr & mask) == mask );
1843 descr &= ~mask;
1844 descr |= TREE_DESCR_16_0;
1845 break;
1846 case 2:
1847 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1848 tl_assert( (descr & mask) == mask );
1849 descr &= ~mask;
1850 descr |= TREE_DESCR_16_1;
1851 break;
1852 case 4:
1853 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1854 tl_assert( (descr & mask) == mask );
1855 descr &= ~mask;
1856 descr |= TREE_DESCR_16_2;
1857 break;
1858 case 6:
1859 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1860 tl_assert( (descr & mask) == mask );
1861 descr &= ~mask;
1862 descr |= TREE_DESCR_16_3;
1863 break;
1864 default:
1865 tl_assert(0);
1866 }
1867 return descr;
1868}
1869
1870static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1871 UShort mask;
1872 switch (toff) {
1873 case 0:
1874 if (!(descr & TREE_DESCR_16_0))
1875 descr = pullup_descr_to_16(descr, 0);
1876 if (!(descr & TREE_DESCR_16_1))
1877 descr = pullup_descr_to_16(descr, 2);
1878 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1879 tl_assert( (descr & mask) == mask );
1880 descr &= ~mask;
1881 descr |= TREE_DESCR_32_0;
1882 break;
1883 case 4:
1884 if (!(descr & TREE_DESCR_16_2))
1885 descr = pullup_descr_to_16(descr, 4);
1886 if (!(descr & TREE_DESCR_16_3))
1887 descr = pullup_descr_to_16(descr, 6);
1888 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1889 tl_assert( (descr & mask) == mask );
1890 descr &= ~mask;
1891 descr |= TREE_DESCR_32_1;
1892 break;
1893 default:
1894 tl_assert(0);
1895 }
1896 return descr;
1897}
1898
1899static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1900 switch (toff) {
1901 case 0: case 4:
1902 return 0 != (descr & TREE_DESCR_64);
1903 default:
1904 tl_assert(0);
1905 }
1906}
1907
1908static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1909 switch (toff) {
1910 case 0:
1911 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1912 case 2:
1913 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1914 case 4:
1915 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1916 case 6:
1917 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1918 default:
1919 tl_assert(0);
1920 }
1921}
1922
1923/* ------------ Cache management ------------ */
1924
1925static void zsm_flush_cache ( void )
1926{
1927 shmem__flush_and_invalidate_scache();
1928}
1929
1930
philippe1475a7f2015-05-11 19:45:08 +00001931static void zsm_init ( void )
sewardjf98e1c02008-10-25 16:22:41 +00001932{
1933 tl_assert( sizeof(UWord) == sizeof(Addr) );
1934
sewardjf98e1c02008-10-25 16:22:41 +00001935 tl_assert(map_shmem == NULL);
1936 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1937 HG_(free),
1938 NULL/*unboxed UWord cmp*/);
philippef54cb662015-05-10 22:19:31 +00001939 /* Invalidate all cache entries. */
1940 tl_assert(!is_valid_scache_tag(1));
1941 for (UWord wix = 0; wix < N_WAY_NENT; wix++) {
1942 cache_shmem.tags0[wix] = 1/*INVALID*/;
1943 }
sewardjf98e1c02008-10-25 16:22:41 +00001944
1945 /* a SecMap must contain an integral number of CacheLines */
1946 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1947 /* also ... a CacheLine holds an integral number of trees */
1948 tl_assert(0 == (N_LINE_ARANGE % 8));
1949}
1950
1951/////////////////////////////////////////////////////////////////
1952/////////////////////////////////////////////////////////////////
1953// //
1954// SECTION END compressed shadow memory //
1955// //
1956/////////////////////////////////////////////////////////////////
1957/////////////////////////////////////////////////////////////////
1958
1959
1960
1961/////////////////////////////////////////////////////////////////
1962/////////////////////////////////////////////////////////////////
1963// //
1964// SECTION BEGIN vts primitives //
1965// //
1966/////////////////////////////////////////////////////////////////
1967/////////////////////////////////////////////////////////////////
1968
sewardjf98e1c02008-10-25 16:22:41 +00001969
sewardje4cce742011-02-24 15:25:24 +00001970/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1971 being compact stand-ins for Thr*'s. Use these functions to map
1972 between them. */
1973static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
1974static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
1975
sewardje4cce742011-02-24 15:25:24 +00001976__attribute__((noreturn))
1977static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
1978{
1979 if (due_to_nThrs) {
florian6bf37262012-10-21 03:23:36 +00001980 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001981 "\n"
1982 "Helgrind: cannot continue, run aborted: too many threads.\n"
1983 "Sorry. Helgrind can only handle programs that create\n"
1984 "%'llu or fewer threads over their entire lifetime.\n"
1985 "\n";
sewardj03e7d272011-05-04 09:08:34 +00001986 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
sewardje4cce742011-02-24 15:25:24 +00001987 } else {
florian6bf37262012-10-21 03:23:36 +00001988 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001989 "\n"
1990 "Helgrind: cannot continue, run aborted: too many\n"
1991 "synchronisation events. Sorry. Helgrind can only handle\n"
1992 "programs which perform %'llu or fewer\n"
1993 "inter-thread synchronisation events (locks, unlocks, etc).\n"
1994 "\n";
1995 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
1996 }
1997 VG_(exit)(1);
1998 /*NOTREACHED*/
1999 tl_assert(0); /*wtf?!*/
2000}
2001
2002
philippec3508652015-03-28 12:01:58 +00002003/* The dead thread (ThrID, actually) tables. A thread may only be
sewardjffce8152011-06-24 10:09:41 +00002004 listed here if we have been notified thereof by libhb_async_exit.
2005 New entries are added at the end. The order isn't important, but
philippec3508652015-03-28 12:01:58 +00002006 the ThrID values must be unique.
2007 verydead_thread_table_not_pruned lists the identity of the threads
2008 that died since the previous round of pruning.
2009 Once pruning is done, these ThrID are added in verydead_thread_table.
2010 We don't actually need to keep the set of threads that have ever died --
sewardjffce8152011-06-24 10:09:41 +00002011 only the threads that have died since the previous round of
2012 pruning. But it's useful for sanity check purposes to keep the
2013 entire set, so we do. */
philippec3508652015-03-28 12:01:58 +00002014static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
sewardjffce8152011-06-24 10:09:41 +00002015static XArray* /* of ThrID */ verydead_thread_table = NULL;
2016
2017/* Arbitrary total ordering on ThrIDs. */
florian6bd9dc12012-11-23 16:17:43 +00002018static Int cmp__ThrID ( const void* v1, const void* v2 ) {
2019 ThrID id1 = *(const ThrID*)v1;
2020 ThrID id2 = *(const ThrID*)v2;
sewardjffce8152011-06-24 10:09:41 +00002021 if (id1 < id2) return -1;
2022 if (id1 > id2) return 1;
2023 return 0;
2024}
2025
philippec3508652015-03-28 12:01:58 +00002026static void verydead_thread_tables_init ( void )
sewardjffce8152011-06-24 10:09:41 +00002027{
2028 tl_assert(!verydead_thread_table);
philippec3508652015-03-28 12:01:58 +00002029 tl_assert(!verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00002030 verydead_thread_table
2031 = VG_(newXA)( HG_(zalloc),
2032 "libhb.verydead_thread_table_init.1",
2033 HG_(free), sizeof(ThrID) );
sewardjffce8152011-06-24 10:09:41 +00002034 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
philippec3508652015-03-28 12:01:58 +00002035 verydead_thread_table_not_pruned
2036 = VG_(newXA)( HG_(zalloc),
2037 "libhb.verydead_thread_table_init.2",
2038 HG_(free), sizeof(ThrID) );
2039 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
sewardjffce8152011-06-24 10:09:41 +00002040}
2041
philippec3508652015-03-28 12:01:58 +00002042static void verydead_thread_table_sort_and_check (XArray* thrids)
2043{
2044 UWord i;
2045
2046 VG_(sortXA)( thrids );
2047 /* Sanity check: check for unique .sts.thr values. */
2048 UWord nBT = VG_(sizeXA)( thrids );
2049 if (nBT > 0) {
2050 ThrID thrid1, thrid2;
2051 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
2052 for (i = 1; i < nBT; i++) {
2053 thrid1 = thrid2;
2054 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
2055 tl_assert(thrid1 < thrid2);
2056 }
2057 }
2058 /* Ok, so the dead thread table thrids has unique and in-order keys. */
2059}
sewardjf98e1c02008-10-25 16:22:41 +00002060
2061/* A VTS contains .ts, its vector clock, and also .id, a field to hold
2062 a backlink for the caller's convenience. Since we have no idea
2063 what to set that to in the library, it always gets set to
2064 VtsID_INVALID. */
2065typedef
2066 struct {
sewardj7aa38a92011-02-27 23:04:12 +00002067 VtsID id;
2068 UInt usedTS;
2069 UInt sizeTS;
2070 ScalarTS ts[0];
sewardjf98e1c02008-10-25 16:22:41 +00002071 }
2072 VTS;
2073
sewardj7aa38a92011-02-27 23:04:12 +00002074/* Allocate a VTS capable of storing 'sizeTS' entries. */
florian6bd9dc12012-11-23 16:17:43 +00002075static VTS* VTS__new ( const HChar* who, UInt sizeTS );
sewardjf98e1c02008-10-25 16:22:41 +00002076
sewardjffce8152011-06-24 10:09:41 +00002077/* Make a clone of 'vts', sizing the new array to exactly match the
sewardj7aa38a92011-02-27 23:04:12 +00002078 number of ScalarTSs present. */
florian6bd9dc12012-11-23 16:17:43 +00002079static VTS* VTS__clone ( const HChar* who, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002080
sewardjffce8152011-06-24 10:09:41 +00002081/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
2082 array is sized exactly to hold the number of required elements.
2083 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
2084 must be in strictly increasing order. */
florian6bd9dc12012-11-23 16:17:43 +00002085static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
sewardjffce8152011-06-24 10:09:41 +00002086
sewardjf98e1c02008-10-25 16:22:41 +00002087/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00002088static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002089
sewardj7aa38a92011-02-27 23:04:12 +00002090/* Create a new singleton VTS in 'out'. Caller must have
2091 pre-allocated 'out' sufficiently big to hold the result in all
2092 possible cases. */
2093static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00002094
sewardj7aa38a92011-02-27 23:04:12 +00002095/* Create in 'out' a VTS which is the same as 'vts' except with
2096 vts[me]++, so to speak. Caller must have pre-allocated 'out'
2097 sufficiently big to hold the result in all possible cases. */
2098static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002099
sewardj7aa38a92011-02-27 23:04:12 +00002100/* Create in 'out' a VTS which is the join (max) of 'a' and
2101 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
2102 the result in all possible cases. */
2103static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002104
sewardj23f12002009-07-24 08:45:08 +00002105/* Compute the partial ordering relation of the two args. Although we
2106 could be completely general and return an enumeration value (EQ,
2107 LT, GT, UN), in fact we only need LEQ, and so we may as well
2108 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00002109
sewardje4cce742011-02-24 15:25:24 +00002110 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
2111 invald ThrID). In the latter case, the returned ThrID indicates
2112 the discovered point for which they are not. There may be more
2113 than one such point, but we only care about seeing one of them, not
2114 all of them. This rather strange convention is used because
2115 sometimes we want to know the actual index at which they first
2116 differ. */
2117static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002118
2119/* Compute an arbitrary structural (total) ordering on the two args,
2120 based on their VCs, so they can be looked up in a table, tree, etc.
2121 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00002122static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002123
florianb28fe892014-10-28 20:52:07 +00002124/* Debugging only. Display the given VTS. */
2125static void VTS__show ( const VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002126
2127/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00002128static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002129
sewardjffce8152011-06-24 10:09:41 +00002130/* Notify the VTS machinery that a thread has been declared
2131 comprehensively dead: that is, it has done an async exit AND it has
2132 been joined with. This should ensure that its local clocks (.viR
2133 and .viW) will never again change, and so all mentions of this
2134 thread from all VTSs in the system may be removed. */
2135static void VTS__declare_thread_very_dead ( Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002136
2137/*--------------- to do with Vector Timestamps ---------------*/
2138
sewardjf98e1c02008-10-25 16:22:41 +00002139static Bool is_sane_VTS ( VTS* vts )
2140{
2141 UWord i, n;
2142 ScalarTS *st1, *st2;
2143 if (!vts) return False;
sewardj555fc572011-02-27 23:39:53 +00002144 if (vts->usedTS > vts->sizeTS) return False;
sewardj7aa38a92011-02-27 23:04:12 +00002145 n = vts->usedTS;
2146 if (n == 1) {
2147 st1 = &vts->ts[0];
2148 if (st1->tym == 0)
2149 return False;
2150 }
2151 else
sewardjf98e1c02008-10-25 16:22:41 +00002152 if (n >= 2) {
2153 for (i = 0; i < n-1; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002154 st1 = &vts->ts[i];
2155 st2 = &vts->ts[i+1];
sewardje4cce742011-02-24 15:25:24 +00002156 if (st1->thrid >= st2->thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002157 return False;
2158 if (st1->tym == 0 || st2->tym == 0)
2159 return False;
2160 }
2161 }
2162 return True;
2163}
2164
2165
sewardj7aa38a92011-02-27 23:04:12 +00002166/* Create a new, empty VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002167*/
florian6bd9dc12012-11-23 16:17:43 +00002168static VTS* VTS__new ( const HChar* who, UInt sizeTS )
sewardjf98e1c02008-10-25 16:22:41 +00002169{
sewardj7aa38a92011-02-27 23:04:12 +00002170 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
2171 tl_assert(vts->usedTS == 0);
2172 vts->sizeTS = sizeTS;
2173 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
sewardjf98e1c02008-10-25 16:22:41 +00002174 return vts;
2175}
2176
sewardj7aa38a92011-02-27 23:04:12 +00002177/* Clone this VTS.
2178*/
florian6bd9dc12012-11-23 16:17:43 +00002179static VTS* VTS__clone ( const HChar* who, VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002180{
2181 tl_assert(vts);
2182 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2183 UInt nTS = vts->usedTS;
2184 VTS* clone = VTS__new(who, nTS);
2185 clone->id = vts->id;
2186 clone->sizeTS = nTS;
2187 clone->usedTS = nTS;
2188 UInt i;
2189 for (i = 0; i < nTS; i++) {
2190 clone->ts[i] = vts->ts[i];
2191 }
2192 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2193 return clone;
2194}
2195
sewardjf98e1c02008-10-25 16:22:41 +00002196
sewardjffce8152011-06-24 10:09:41 +00002197/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
2198 must be in strictly increasing order. We could obviously do this
2199 much more efficiently (in linear time) if necessary.
2200*/
florian6bd9dc12012-11-23 16:17:43 +00002201static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
sewardjffce8152011-06-24 10:09:41 +00002202{
2203 UInt i, j;
2204 tl_assert(vts);
2205 tl_assert(thridsToDel);
2206 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2207 UInt nTS = vts->usedTS;
2208 /* Figure out how many ScalarTSs will remain in the output. */
2209 UInt nReq = nTS;
2210 for (i = 0; i < nTS; i++) {
2211 ThrID thrid = vts->ts[i].thrid;
2212 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2213 nReq--;
2214 }
2215 tl_assert(nReq <= nTS);
2216 /* Copy the ones that will remain. */
2217 VTS* res = VTS__new(who, nReq);
2218 j = 0;
2219 for (i = 0; i < nTS; i++) {
2220 ThrID thrid = vts->ts[i].thrid;
2221 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2222 continue;
2223 res->ts[j++] = vts->ts[i];
2224 }
2225 tl_assert(j == nReq);
2226 tl_assert(j == res->sizeTS);
2227 res->usedTS = j;
2228 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2229 return res;
2230}
2231
2232
sewardjf98e1c02008-10-25 16:22:41 +00002233/* Delete this VTS in its entirety.
2234*/
sewardj7aa38a92011-02-27 23:04:12 +00002235static void VTS__delete ( VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002236{
2237 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002238 tl_assert(vts->usedTS <= vts->sizeTS);
2239 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
sewardjf98e1c02008-10-25 16:22:41 +00002240 HG_(free)(vts);
2241}
2242
2243
2244/* Create a new singleton VTS.
2245*/
sewardj7aa38a92011-02-27 23:04:12 +00002246static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2247{
sewardjf98e1c02008-10-25 16:22:41 +00002248 tl_assert(thr);
2249 tl_assert(tym >= 1);
sewardj7aa38a92011-02-27 23:04:12 +00002250 tl_assert(out);
2251 tl_assert(out->usedTS == 0);
2252 tl_assert(out->sizeTS >= 1);
2253 UInt hi = out->usedTS++;
2254 out->ts[hi].thrid = Thr__to_ThrID(thr);
2255 out->ts[hi].tym = tym;
sewardjf98e1c02008-10-25 16:22:41 +00002256}
2257
2258
2259/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2260 not modified.
2261*/
sewardj7aa38a92011-02-27 23:04:12 +00002262static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002263{
sewardj7aa38a92011-02-27 23:04:12 +00002264 UInt i, n;
sewardje4cce742011-02-24 15:25:24 +00002265 ThrID me_thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002266 Bool found = False;
sewardjc8028ad2010-05-05 09:34:42 +00002267
2268 stats__vts__tick++;
2269
sewardj7aa38a92011-02-27 23:04:12 +00002270 tl_assert(out);
2271 tl_assert(out->usedTS == 0);
2272 if (vts->usedTS >= ThrID_MAX_VALID)
2273 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2274 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2275
sewardjf98e1c02008-10-25 16:22:41 +00002276 tl_assert(me);
sewardje4cce742011-02-24 15:25:24 +00002277 me_thrid = Thr__to_ThrID(me);
sewardjf98e1c02008-10-25 16:22:41 +00002278 tl_assert(is_sane_VTS(vts));
sewardj7aa38a92011-02-27 23:04:12 +00002279 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002280
sewardj555fc572011-02-27 23:39:53 +00002281 /* Copy all entries which precede 'me'. */
2282 for (i = 0; i < n; i++) {
2283 ScalarTS* here = &vts->ts[i];
2284 if (UNLIKELY(here->thrid >= me_thrid))
2285 break;
2286 UInt hi = out->usedTS++;
2287 out->ts[hi] = *here;
2288 }
2289
2290 /* 'i' now indicates the next entry to copy, if any.
2291 There are 3 possibilities:
2292 (a) there is no next entry (we used them all up already):
2293 add (me_thrid,1) to the output, and quit
2294 (b) there is a next entry, and its thrid > me_thrid:
2295 add (me_thrid,1) to the output, then copy the remaining entries
2296 (c) there is a next entry, and its thrid == me_thrid:
2297 copy it to the output but increment its timestamp value.
2298 Then copy the remaining entries. (c) is the common case.
2299 */
2300 tl_assert(i >= 0 && i <= n);
2301 if (i == n) { /* case (a) */
sewardj7aa38a92011-02-27 23:04:12 +00002302 UInt hi = out->usedTS++;
2303 out->ts[hi].thrid = me_thrid;
2304 out->ts[hi].tym = 1;
sewardj555fc572011-02-27 23:39:53 +00002305 } else {
2306 /* cases (b) and (c) */
2307 ScalarTS* here = &vts->ts[i];
2308 if (me_thrid == here->thrid) { /* case (c) */
sewardj7aa38a92011-02-27 23:04:12 +00002309 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
sewardje4cce742011-02-24 15:25:24 +00002310 /* We're hosed. We have to stop. */
2311 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2312 }
sewardj7aa38a92011-02-27 23:04:12 +00002313 UInt hi = out->usedTS++;
2314 out->ts[hi].thrid = here->thrid;
2315 out->ts[hi].tym = here->tym + 1;
sewardjf98e1c02008-10-25 16:22:41 +00002316 i++;
sewardj555fc572011-02-27 23:39:53 +00002317 found = True;
2318 } else { /* case (b) */
sewardj7aa38a92011-02-27 23:04:12 +00002319 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002320 out->ts[hi].thrid = me_thrid;
2321 out->ts[hi].tym = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002322 }
sewardj555fc572011-02-27 23:39:53 +00002323 /* And copy any remaining entries. */
sewardjf98e1c02008-10-25 16:22:41 +00002324 for (/*keepgoing*/; i < n; i++) {
sewardj555fc572011-02-27 23:39:53 +00002325 ScalarTS* here2 = &vts->ts[i];
sewardj7aa38a92011-02-27 23:04:12 +00002326 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002327 out->ts[hi] = *here2;
sewardjf98e1c02008-10-25 16:22:41 +00002328 }
2329 }
sewardj555fc572011-02-27 23:39:53 +00002330
sewardj7aa38a92011-02-27 23:04:12 +00002331 tl_assert(is_sane_VTS(out));
2332 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2333 tl_assert(out->usedTS <= out->sizeTS);
sewardjf98e1c02008-10-25 16:22:41 +00002334}
2335
2336
2337/* Return a new VTS constructed as the join (max) of the 2 args.
2338 Neither arg is modified.
2339*/
sewardj7aa38a92011-02-27 23:04:12 +00002340static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002341{
sewardj7aa38a92011-02-27 23:04:12 +00002342 UInt ia, ib, useda, usedb;
sewardjf98e1c02008-10-25 16:22:41 +00002343 ULong tyma, tymb, tymMax;
sewardje4cce742011-02-24 15:25:24 +00002344 ThrID thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002345 UInt ncommon = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002346
sewardjc8028ad2010-05-05 09:34:42 +00002347 stats__vts__join++;
2348
sewardj7aa38a92011-02-27 23:04:12 +00002349 tl_assert(a);
2350 tl_assert(b);
2351 useda = a->usedTS;
2352 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002353
sewardj7aa38a92011-02-27 23:04:12 +00002354 tl_assert(out);
2355 tl_assert(out->usedTS == 0);
2356 /* overly conservative test, but doing better involves comparing
2357 the two VTSs, which we don't want to do at this point. */
2358 if (useda + usedb >= ThrID_MAX_VALID)
2359 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2360 tl_assert(out->sizeTS >= useda + usedb);
2361
sewardjf98e1c02008-10-25 16:22:41 +00002362 ia = ib = 0;
2363
2364 while (1) {
2365
sewardje4cce742011-02-24 15:25:24 +00002366 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2367 from a and b in order, where thrid is the next ThrID
sewardjf98e1c02008-10-25 16:22:41 +00002368 occurring in either a or b, and tyma/b are the relevant
2369 scalar timestamps, taking into account implicit zeroes. */
2370 tl_assert(ia >= 0 && ia <= useda);
2371 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002372
njn4c245e52009-03-15 23:25:38 +00002373 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002374 /* both empty - done */
2375 break;
njn4c245e52009-03-15 23:25:38 +00002376
2377 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002378 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002379 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002380 thrid = tmpb->thrid;
2381 tyma = 0;
2382 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002383 ib++;
njn4c245e52009-03-15 23:25:38 +00002384
2385 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002386 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002387 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002388 thrid = tmpa->thrid;
2389 tyma = tmpa->tym;
2390 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002391 ia++;
njn4c245e52009-03-15 23:25:38 +00002392
2393 } else {
sewardje4cce742011-02-24 15:25:24 +00002394 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002395 ScalarTS* tmpa = &a->ts[ia];
2396 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002397 if (tmpa->thrid < tmpb->thrid) {
2398 /* a has the lowest unconsidered ThrID */
2399 thrid = tmpa->thrid;
2400 tyma = tmpa->tym;
2401 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002402 ia++;
sewardje4cce742011-02-24 15:25:24 +00002403 } else if (tmpa->thrid > tmpb->thrid) {
2404 /* b has the lowest unconsidered ThrID */
2405 thrid = tmpb->thrid;
2406 tyma = 0;
2407 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002408 ib++;
2409 } else {
sewardje4cce742011-02-24 15:25:24 +00002410 /* they both next mention the same ThrID */
2411 tl_assert(tmpa->thrid == tmpb->thrid);
2412 thrid = tmpa->thrid; /* == tmpb->thrid */
2413 tyma = tmpa->tym;
2414 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002415 ia++;
2416 ib++;
sewardj7aa38a92011-02-27 23:04:12 +00002417 ncommon++;
sewardjf98e1c02008-10-25 16:22:41 +00002418 }
2419 }
2420
2421 /* having laboriously determined (thr, tyma, tymb), do something
2422 useful with it. */
2423 tymMax = tyma > tymb ? tyma : tymb;
2424 if (tymMax > 0) {
sewardj7aa38a92011-02-27 23:04:12 +00002425 UInt hi = out->usedTS++;
2426 out->ts[hi].thrid = thrid;
2427 out->ts[hi].tym = tymMax;
sewardjf98e1c02008-10-25 16:22:41 +00002428 }
2429
2430 }
2431
sewardj7aa38a92011-02-27 23:04:12 +00002432 tl_assert(is_sane_VTS(out));
2433 tl_assert(out->usedTS <= out->sizeTS);
2434 tl_assert(out->usedTS == useda + usedb - ncommon);
sewardjf98e1c02008-10-25 16:22:41 +00002435}
2436
2437
sewardje4cce742011-02-24 15:25:24 +00002438/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2439 they are, or the first ThrID for which they are not (no valid ThrID
2440 has the value zero). This rather strange convention is used
2441 because sometimes we want to know the actual index at which they
2442 first differ. */
2443static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002444{
sewardj23f12002009-07-24 08:45:08 +00002445 Word ia, ib, useda, usedb;
2446 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00002447
sewardjc8028ad2010-05-05 09:34:42 +00002448 stats__vts__cmpLEQ++;
2449
sewardj7aa38a92011-02-27 23:04:12 +00002450 tl_assert(a);
2451 tl_assert(b);
2452 useda = a->usedTS;
2453 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002454
2455 ia = ib = 0;
2456
2457 while (1) {
2458
njn4c245e52009-03-15 23:25:38 +00002459 /* This logic is to enumerate doubles (tyma, tymb) drawn
2460 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00002461 scalar timestamps, taking into account implicit zeroes. */
sewardje4cce742011-02-24 15:25:24 +00002462 ThrID thrid;
sewardj23f12002009-07-24 08:45:08 +00002463
sewardjf98e1c02008-10-25 16:22:41 +00002464 tl_assert(ia >= 0 && ia <= useda);
2465 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002466
njn4c245e52009-03-15 23:25:38 +00002467 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002468 /* both empty - done */
2469 break;
njn4c245e52009-03-15 23:25:38 +00002470
2471 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002472 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002473 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002474 tyma = 0;
2475 tymb = tmpb->tym;
2476 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002477 ib++;
njn4c245e52009-03-15 23:25:38 +00002478
2479 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002480 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002481 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002482 tyma = tmpa->tym;
2483 thrid = tmpa->thrid;
2484 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002485 ia++;
njn4c245e52009-03-15 23:25:38 +00002486
2487 } else {
sewardje4cce742011-02-24 15:25:24 +00002488 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002489 ScalarTS* tmpa = &a->ts[ia];
2490 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002491 if (tmpa->thrid < tmpb->thrid) {
2492 /* a has the lowest unconsidered ThrID */
2493 tyma = tmpa->tym;
2494 thrid = tmpa->thrid;
2495 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002496 ia++;
2497 }
2498 else
sewardje4cce742011-02-24 15:25:24 +00002499 if (tmpa->thrid > tmpb->thrid) {
2500 /* b has the lowest unconsidered ThrID */
2501 tyma = 0;
2502 tymb = tmpb->tym;
2503 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002504 ib++;
2505 } else {
sewardje4cce742011-02-24 15:25:24 +00002506 /* they both next mention the same ThrID */
2507 tl_assert(tmpa->thrid == tmpb->thrid);
2508 tyma = tmpa->tym;
2509 thrid = tmpa->thrid;
2510 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002511 ia++;
2512 ib++;
2513 }
2514 }
2515
njn4c245e52009-03-15 23:25:38 +00002516 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00002517 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00002518 if (tyma > tymb) {
2519 /* not LEQ at this index. Quit, since the answer is
2520 determined already. */
sewardje4cce742011-02-24 15:25:24 +00002521 tl_assert(thrid >= 1024);
2522 return thrid;
sewardj23f12002009-07-24 08:45:08 +00002523 }
sewardjf98e1c02008-10-25 16:22:41 +00002524 }
2525
sewardje4cce742011-02-24 15:25:24 +00002526 return 0; /* all points are LEQ => return an invalid ThrID */
sewardjf98e1c02008-10-25 16:22:41 +00002527}
2528
2529
2530/* Compute an arbitrary structural (total) ordering on the two args,
2531 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00002532 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2533 performance critical so there is some effort expended to make it sa
2534 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00002535*/
2536Word VTS__cmp_structural ( VTS* a, VTS* b )
2537{
2538 /* We just need to generate an arbitrary total ordering based on
2539 a->ts and b->ts. Preferably do it in a way which comes across likely
2540 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00002541 Word i;
2542 Word useda = 0, usedb = 0;
2543 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002544
sewardjc8028ad2010-05-05 09:34:42 +00002545 stats__vts__cmp_structural++;
2546
2547 tl_assert(a);
2548 tl_assert(b);
2549
sewardj7aa38a92011-02-27 23:04:12 +00002550 ctsa = &a->ts[0]; useda = a->usedTS;
2551 ctsb = &b->ts[0]; usedb = b->usedTS;
sewardjc8028ad2010-05-05 09:34:42 +00002552
2553 if (LIKELY(useda == usedb)) {
2554 ScalarTS *tmpa = NULL, *tmpb = NULL;
2555 stats__vts__cmp_structural_slow++;
2556 /* Same length vectors. Find the first difference, if any, as
2557 fast as possible. */
2558 for (i = 0; i < useda; i++) {
2559 tmpa = &ctsa[i];
2560 tmpb = &ctsb[i];
sewardje4cce742011-02-24 15:25:24 +00002561 if (LIKELY(tmpa->tym == tmpb->tym
2562 && tmpa->thrid == tmpb->thrid))
sewardjc8028ad2010-05-05 09:34:42 +00002563 continue;
2564 else
2565 break;
2566 }
2567 if (UNLIKELY(i == useda)) {
2568 /* They're identical. */
2569 return 0;
2570 } else {
2571 tl_assert(i >= 0 && i < useda);
2572 if (tmpa->tym < tmpb->tym) return -1;
2573 if (tmpa->tym > tmpb->tym) return 1;
sewardje4cce742011-02-24 15:25:24 +00002574 if (tmpa->thrid < tmpb->thrid) return -1;
2575 if (tmpa->thrid > tmpb->thrid) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002576 /* we just established them as non-identical, hence: */
2577 }
2578 /*NOTREACHED*/
2579 tl_assert(0);
2580 }
sewardjf98e1c02008-10-25 16:22:41 +00002581
2582 if (useda < usedb) return -1;
2583 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002584 /*NOTREACHED*/
2585 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00002586}
2587
2588
florianb28fe892014-10-28 20:52:07 +00002589/* Debugging only. Display the given VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002590*/
florianb28fe892014-10-28 20:52:07 +00002591static void VTS__show ( const VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002592{
sewardjf98e1c02008-10-25 16:22:41 +00002593 Word i, n;
florian4367abe2015-02-28 09:22:09 +00002594 tl_assert(vts);
florianb28fe892014-10-28 20:52:07 +00002595
2596 VG_(printf)("[");
sewardj7aa38a92011-02-27 23:04:12 +00002597 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002598 for (i = 0; i < n; i++) {
florianb28fe892014-10-28 20:52:07 +00002599 const ScalarTS *st = &vts->ts[i];
2600 VG_(printf)(i < n-1 ? "%u:%llu " : "%u:%llu", st->thrid, (ULong)st->tym);
sewardjf98e1c02008-10-25 16:22:41 +00002601 }
florianb28fe892014-10-28 20:52:07 +00002602 VG_(printf)("]");
sewardjf98e1c02008-10-25 16:22:41 +00002603}
2604
2605
2606/* Debugging only. Return vts[index], so to speak.
2607*/
sewardj7aa38a92011-02-27 23:04:12 +00002608ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2609{
sewardjf98e1c02008-10-25 16:22:41 +00002610 UWord i, n;
sewardje4cce742011-02-24 15:25:24 +00002611 ThrID idx_thrid = Thr__to_ThrID(idx);
sewardjc8028ad2010-05-05 09:34:42 +00002612 stats__vts__indexat_slow++;
florian4367abe2015-02-28 09:22:09 +00002613 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002614 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002615 for (i = 0; i < n; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002616 ScalarTS* st = &vts->ts[i];
sewardje4cce742011-02-24 15:25:24 +00002617 if (st->thrid == idx_thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002618 return st->tym;
2619 }
2620 return 0;
2621}
2622
2623
sewardjffce8152011-06-24 10:09:41 +00002624/* See comment on prototype above.
2625*/
2626static void VTS__declare_thread_very_dead ( Thr* thr )
2627{
2628 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2629
2630 tl_assert(thr->llexit_done);
2631 tl_assert(thr->joinedwith_done);
2632
2633 ThrID nyu;
2634 nyu = Thr__to_ThrID(thr);
philippec3508652015-03-28 12:01:58 +00002635 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
sewardjffce8152011-06-24 10:09:41 +00002636
2637 /* We can only get here if we're assured that we'll never again
2638 need to look at this thread's ::viR or ::viW. Set them to
2639 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2640 mostly so that we don't wind up pruning them (as that would be
2641 nonsensical: the only interesting ScalarTS entry for a dead
2642 thread is its own index, and the pruning will remove that.). */
2643 VtsID__rcdec(thr->viR);
2644 VtsID__rcdec(thr->viW);
2645 thr->viR = VtsID_INVALID;
2646 thr->viW = VtsID_INVALID;
2647}
2648
2649
sewardjf98e1c02008-10-25 16:22:41 +00002650/////////////////////////////////////////////////////////////////
2651/////////////////////////////////////////////////////////////////
2652// //
2653// SECTION END vts primitives //
2654// //
2655/////////////////////////////////////////////////////////////////
2656/////////////////////////////////////////////////////////////////
2657
2658
2659
2660/////////////////////////////////////////////////////////////////
2661/////////////////////////////////////////////////////////////////
2662// //
2663// SECTION BEGIN main library //
2664// //
2665/////////////////////////////////////////////////////////////////
2666/////////////////////////////////////////////////////////////////
2667
2668
2669/////////////////////////////////////////////////////////
2670// //
2671// VTS set //
2672// //
2673/////////////////////////////////////////////////////////
2674
sewardjffce8152011-06-24 10:09:41 +00002675static WordFM* /* WordFM VTS* void */ vts_set = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002676
2677static void vts_set_init ( void )
2678{
2679 tl_assert(!vts_set);
2680 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2681 HG_(free),
2682 (Word(*)(UWord,UWord))VTS__cmp_structural );
sewardjf98e1c02008-10-25 16:22:41 +00002683}
2684
sewardj7aa38a92011-02-27 23:04:12 +00002685/* Given a VTS, look in vts_set to see if we already have a
2686 structurally identical one. If yes, return the pair (True, pointer
2687 to the existing one). If no, clone this one, add the clone to the
2688 set, and return (False, pointer to the clone). */
2689static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002690{
2691 UWord keyW, valW;
sewardj7aa38a92011-02-27 23:04:12 +00002692 stats__vts_set__focaa++;
2693 tl_assert(cand->id == VtsID_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00002694 /* lookup cand (by value) */
2695 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2696 /* found it */
2697 tl_assert(valW == 0);
2698 /* if this fails, cand (by ref) was already present (!) */
2699 tl_assert(keyW != (UWord)cand);
sewardj7aa38a92011-02-27 23:04:12 +00002700 *res = (VTS*)keyW;
2701 return True;
sewardjf98e1c02008-10-25 16:22:41 +00002702 } else {
sewardj7aa38a92011-02-27 23:04:12 +00002703 /* not present. Clone, add and return address of clone. */
2704 stats__vts_set__focaa_a++;
2705 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2706 tl_assert(clone != cand);
2707 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2708 *res = clone;
2709 return False;
sewardjf98e1c02008-10-25 16:22:41 +00002710 }
2711}
2712
2713
2714/////////////////////////////////////////////////////////
2715// //
2716// VTS table //
2717// //
2718/////////////////////////////////////////////////////////
2719
2720static void VtsID__invalidate_caches ( void ); /* fwds */
2721
2722/* A type to hold VTS table entries. Invariants:
2723 If .vts == NULL, then this entry is not in use, so:
2724 - .rc == 0
2725 - this entry is on the freelist (unfortunately, does not imply
philippea1ac2f42015-05-01 17:12:00 +00002726 any constraints on value for u.freelink)
sewardjf98e1c02008-10-25 16:22:41 +00002727 If .vts != NULL, then this entry is in use:
2728 - .vts is findable in vts_set
2729 - .vts->id == this entry number
2730 - no specific value for .rc (even 0 is OK)
philippea1ac2f42015-05-01 17:12:00 +00002731 - this entry is not on freelist, so u.freelink == VtsID_INVALID
sewardjf98e1c02008-10-25 16:22:41 +00002732*/
2733typedef
2734 struct {
2735 VTS* vts; /* vts, in vts_set */
2736 UWord rc; /* reference count - enough for entire aspace */
philippea1ac2f42015-05-01 17:12:00 +00002737 union {
2738 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2739 VtsID remap; /* used only during pruning, for used entries */
2740 } u;
2741 /* u.freelink only used when vts == NULL,
2742 u.remap only used when vts != NULL, during pruning. */
sewardjf98e1c02008-10-25 16:22:41 +00002743 }
2744 VtsTE;
2745
2746/* The VTS table. */
2747static XArray* /* of VtsTE */ vts_tab = NULL;
2748
2749/* An index into the VTS table, indicating the start of the list of
2750 free (available for use) entries. If the list is empty, this is
2751 VtsID_INVALID. */
2752static VtsID vts_tab_freelist = VtsID_INVALID;
2753
2754/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2755 vts_tab equals or exceeds this size. After GC, the value here is
2756 set appropriately so as to check for the next GC point. */
2757static Word vts_next_GC_at = 1000;
2758
2759static void vts_tab_init ( void )
2760{
florian91ed8cc2014-09-15 18:50:17 +00002761 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2762 HG_(free), sizeof(VtsTE) );
2763 vts_tab_freelist = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002764}
2765
2766/* Add ii to the free list, checking that it looks out-of-use. */
2767static void add_to_free_list ( VtsID ii )
2768{
2769 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2770 tl_assert(ie->vts == NULL);
2771 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002772 tl_assert(ie->u.freelink == VtsID_INVALID);
2773 ie->u.freelink = vts_tab_freelist;
sewardjf98e1c02008-10-25 16:22:41 +00002774 vts_tab_freelist = ii;
2775}
2776
2777/* Get an entry from the free list. This will return VtsID_INVALID if
2778 the free list is empty. */
2779static VtsID get_from_free_list ( void )
2780{
2781 VtsID ii;
2782 VtsTE* ie;
2783 if (vts_tab_freelist == VtsID_INVALID)
2784 return VtsID_INVALID;
2785 ii = vts_tab_freelist;
2786 ie = VG_(indexXA)( vts_tab, ii );
2787 tl_assert(ie->vts == NULL);
2788 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002789 vts_tab_freelist = ie->u.freelink;
sewardjf98e1c02008-10-25 16:22:41 +00002790 return ii;
2791}
2792
2793/* Produce a new VtsID that can be used, either by getting it from
2794 the freelist, or, if that is empty, by expanding vts_tab. */
2795static VtsID get_new_VtsID ( void )
2796{
2797 VtsID ii;
2798 VtsTE te;
2799 ii = get_from_free_list();
2800 if (ii != VtsID_INVALID)
2801 return ii;
2802 te.vts = NULL;
2803 te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002804 te.u.freelink = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002805 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2806 return ii;
2807}
2808
2809
2810/* Indirect callback from lib_zsm. */
2811static void VtsID__rcinc ( VtsID ii )
2812{
2813 VtsTE* ie;
2814 /* VG_(indexXA) does a range check for us */
2815 ie = VG_(indexXA)( vts_tab, ii );
2816 tl_assert(ie->vts); /* else it's not in use */
2817 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2818 tl_assert(ie->vts->id == ii);
2819 ie->rc++;
2820}
2821
2822/* Indirect callback from lib_zsm. */
2823static void VtsID__rcdec ( VtsID ii )
2824{
2825 VtsTE* ie;
2826 /* VG_(indexXA) does a range check for us */
2827 ie = VG_(indexXA)( vts_tab, ii );
2828 tl_assert(ie->vts); /* else it's not in use */
2829 tl_assert(ie->rc > 0); /* else RC snafu */
2830 tl_assert(ie->vts->id == ii);
2831 ie->rc--;
2832}
2833
2834
sewardj7aa38a92011-02-27 23:04:12 +00002835/* Look up 'cand' in our collection of VTSs. If present, return the
2836 VtsID for the pre-existing version. If not present, clone it, add
2837 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2838 it, and return that. */
2839static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002840{
sewardj7aa38a92011-02-27 23:04:12 +00002841 VTS* in_tab = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002842 tl_assert(cand->id == VtsID_INVALID);
sewardj7aa38a92011-02-27 23:04:12 +00002843 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2844 tl_assert(in_tab);
2845 if (already_have) {
2846 /* We already have a copy of 'cand'. Use that. */
sewardjf98e1c02008-10-25 16:22:41 +00002847 VtsTE* ie;
sewardj7aa38a92011-02-27 23:04:12 +00002848 tl_assert(in_tab->id != VtsID_INVALID);
2849 ie = VG_(indexXA)( vts_tab, in_tab->id );
2850 tl_assert(ie->vts == in_tab);
2851 return in_tab->id;
sewardjf98e1c02008-10-25 16:22:41 +00002852 } else {
2853 VtsID ii = get_new_VtsID();
2854 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
sewardj7aa38a92011-02-27 23:04:12 +00002855 ie->vts = in_tab;
sewardjf98e1c02008-10-25 16:22:41 +00002856 ie->rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002857 ie->u.freelink = VtsID_INVALID;
sewardj7aa38a92011-02-27 23:04:12 +00002858 in_tab->id = ii;
sewardjf98e1c02008-10-25 16:22:41 +00002859 return ii;
2860 }
2861}
2862
2863
florian6bd9dc12012-11-23 16:17:43 +00002864static void show_vts_stats ( const HChar* caller )
sewardjf98e1c02008-10-25 16:22:41 +00002865{
2866 UWord nSet, nTab, nLive;
2867 ULong totrc;
2868 UWord n, i;
2869 nSet = VG_(sizeFM)( vts_set );
2870 nTab = VG_(sizeXA)( vts_tab );
2871 totrc = 0;
2872 nLive = 0;
2873 n = VG_(sizeXA)( vts_tab );
2874 for (i = 0; i < n; i++) {
2875 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2876 if (ie->vts) {
2877 nLive++;
2878 totrc += (ULong)ie->rc;
2879 } else {
2880 tl_assert(ie->rc == 0);
2881 }
2882 }
2883 VG_(printf)(" show_vts_stats %s\n", caller);
2884 VG_(printf)(" vts_tab size %4lu\n", nTab);
2885 VG_(printf)(" vts_tab live %4lu\n", nLive);
2886 VG_(printf)(" vts_set size %4lu\n", nSet);
2887 VG_(printf)(" total rc %4llu\n", totrc);
2888}
2889
sewardjffce8152011-06-24 10:09:41 +00002890
2891/* --- Helpers for VtsID pruning --- */
2892
2893static
2894void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2895 /*MOD*/XArray* /* of VtsTE */ new_tab,
2896 VtsID* ii )
2897{
2898 VtsTE *old_te, *new_te;
2899 VtsID old_id, new_id;
2900 /* We're relying here on VG_(indexXA)'s range checking to assert on
2901 any stupid values, in particular *ii == VtsID_INVALID. */
2902 old_id = *ii;
2903 old_te = VG_(indexXA)( old_tab, old_id );
2904 old_te->rc--;
philippea1ac2f42015-05-01 17:12:00 +00002905 new_id = old_te->u.remap;
sewardjffce8152011-06-24 10:09:41 +00002906 new_te = VG_(indexXA)( new_tab, new_id );
2907 new_te->rc++;
2908 *ii = new_id;
2909}
2910
2911static
2912void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2913 /*MOD*/XArray* /* of VtsTE */ new_tab,
2914 SVal* s )
2915{
2916 SVal old_sv, new_sv;
2917 old_sv = *s;
2918 if (SVal__isC(old_sv)) {
2919 VtsID rMin, wMin;
2920 rMin = SVal__unC_Rmin(old_sv);
2921 wMin = SVal__unC_Wmin(old_sv);
2922 remap_VtsID( old_tab, new_tab, &rMin );
2923 remap_VtsID( old_tab, new_tab, &wMin );
2924 new_sv = SVal__mkC( rMin, wMin );
2925 *s = new_sv;
2926 }
2927}
2928
2929
sewardjf98e1c02008-10-25 16:22:41 +00002930/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002931__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002932static void vts_tab__do_GC ( Bool show_stats )
2933{
2934 UWord i, nTab, nLive, nFreed;
2935
sewardjffce8152011-06-24 10:09:41 +00002936 /* ---------- BEGIN VTS GC ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00002937 /* check this is actually necessary. */
2938 tl_assert(vts_tab_freelist == VtsID_INVALID);
2939
2940 /* empty the caches for partial order checks and binary joins. We
2941 could do better and prune out the entries to be deleted, but it
2942 ain't worth the hassle. */
2943 VtsID__invalidate_caches();
2944
2945 /* First, make the reference counts up to date. */
2946 zsm_flush_cache();
2947
2948 nTab = VG_(sizeXA)( vts_tab );
2949
2950 if (show_stats) {
2951 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2952 show_vts_stats("before GC");
2953 }
2954
sewardjffce8152011-06-24 10:09:41 +00002955 /* Now we can inspect the entire vts_tab. Any entries with zero
2956 .rc fields are now no longer in use and can be put back on the
sewardjf98e1c02008-10-25 16:22:41 +00002957 free list, removed from vts_set, and deleted. */
2958 nFreed = 0;
2959 for (i = 0; i < nTab; i++) {
2960 Bool present;
sewardjffce8152011-06-24 10:09:41 +00002961 UWord oldK = 0, oldV = 12345;
sewardjf98e1c02008-10-25 16:22:41 +00002962 VtsTE* te = VG_(indexXA)( vts_tab, i );
2963 if (te->vts == NULL) {
2964 tl_assert(te->rc == 0);
2965 continue; /* already on the free list (presumably) */
2966 }
2967 if (te->rc > 0)
2968 continue; /* in use */
2969 /* Ok, we got one we can free. */
2970 tl_assert(te->vts->id == i);
2971 /* first, remove it from vts_set. */
2972 present = VG_(delFromFM)( vts_set,
2973 &oldK, &oldV, (UWord)te->vts );
2974 tl_assert(present); /* else it isn't in vts_set ?! */
2975 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2976 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2977 /* now free the VTS itself */
2978 VTS__delete(te->vts);
2979 te->vts = NULL;
2980 /* and finally put this entry on the free list */
philippea1ac2f42015-05-01 17:12:00 +00002981 tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
sewardjf98e1c02008-10-25 16:22:41 +00002982 add_to_free_list( i );
2983 nFreed++;
2984 }
2985
2986 /* Now figure out when the next GC should be. We'll allow the
2987 number of VTSs to double before GCing again. Except of course
2988 that since we can't (or, at least, don't) shrink vts_tab, we
2989 can't set the threshhold value smaller than it. */
2990 tl_assert(nFreed <= nTab);
2991 nLive = nTab - nFreed;
2992 tl_assert(nLive >= 0 && nLive <= nTab);
2993 vts_next_GC_at = 2 * nLive;
2994 if (vts_next_GC_at < nTab)
2995 vts_next_GC_at = nTab;
2996
2997 if (show_stats) {
2998 show_vts_stats("after GC");
2999 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
3000 }
3001
philippe2bd23262015-05-11 20:56:49 +00003002 stats__vts_tab_GC++;
sewardj5e2ac3b2009-08-11 10:39:25 +00003003 if (VG_(clo_stats)) {
sewardjf98e1c02008-10-25 16:22:41 +00003004 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00003005 VG_(message)(Vg_DebugMsg,
philippef54cb662015-05-10 22:19:31 +00003006 "libhb: VTS GC: #%lu old size %lu live %lu (%2llu%%)\n",
3007 stats__vts_tab_GC,
3008 nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00003009 }
sewardjffce8152011-06-24 10:09:41 +00003010 /* ---------- END VTS GC ---------- */
3011
3012 /* Decide whether to do VTS pruning. We have one of three
3013 settings. */
3014 static UInt pruning_auto_ctr = 0; /* do not make non-static */
3015
3016 Bool do_pruning = False;
3017 switch (HG_(clo_vts_pruning)) {
3018 case 0: /* never */
3019 break;
3020 case 1: /* auto */
3021 do_pruning = (++pruning_auto_ctr % 5) == 0;
3022 break;
3023 case 2: /* always */
3024 do_pruning = True;
3025 break;
3026 default:
3027 tl_assert(0);
3028 }
3029
3030 /* The rest of this routine only handles pruning, so we can
3031 quit at this point if it is not to be done. */
3032 if (!do_pruning)
3033 return;
philippec3508652015-03-28 12:01:58 +00003034 /* No need to do pruning if no thread died since the last pruning as
3035 no VtsTE can be pruned. */
3036 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
3037 return;
sewardjffce8152011-06-24 10:09:41 +00003038
3039 /* ---------- BEGIN VTS PRUNING ---------- */
philippec3508652015-03-28 12:01:58 +00003040 /* Sort and check the very dead threads that died since the last pruning.
3041 Sorting is used for the check and so that we can quickly look
sewardjffce8152011-06-24 10:09:41 +00003042 up the dead-thread entries as we work through the VTSs. */
philippec3508652015-03-28 12:01:58 +00003043 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003044
3045 /* We will run through the old table, and create a new table and
philippea1ac2f42015-05-01 17:12:00 +00003046 set, at the same time setting the u.remap entries in the old
sewardjffce8152011-06-24 10:09:41 +00003047 table to point to the new entries. Then, visit every VtsID in
3048 the system, and replace all of them with new ones, using the
philippea1ac2f42015-05-01 17:12:00 +00003049 u.remap entries in the old table. Finally, we can delete the old
sewardjffce8152011-06-24 10:09:41 +00003050 table and set. */
3051
3052 XArray* /* of VtsTE */ new_tab
3053 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
3054 HG_(free), sizeof(VtsTE) );
3055
3056 /* WordFM VTS* void */
3057 WordFM* new_set
3058 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
3059 HG_(free),
3060 (Word(*)(UWord,UWord))VTS__cmp_structural );
3061
3062 /* Visit each old VTS. For each one:
3063
3064 * make a pruned version
3065
3066 * search new_set for the pruned version, yielding either
3067 Nothing (not present) or the new VtsID for it.
3068
3069 * if not present, allocate a new VtsID for it, insert (pruned
3070 VTS, new VtsID) in the tree, and set
3071 remap_table[old VtsID] = new VtsID.
3072
3073 * if present, set remap_table[old VtsID] = new VtsID, where
3074 new VtsID was determined by the tree lookup. Then free up
3075 the clone.
3076 */
3077
3078 UWord nBeforePruning = 0, nAfterPruning = 0;
3079 UWord nSTSsBefore = 0, nSTSsAfter = 0;
3080 VtsID new_VtsID_ctr = 0;
3081
3082 for (i = 0; i < nTab; i++) {
3083
3084 /* For each old VTS .. */
3085 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
3086 VTS* old_vts = old_te->vts;
sewardjffce8152011-06-24 10:09:41 +00003087
3088 /* Skip it if not in use */
3089 if (old_te->rc == 0) {
3090 tl_assert(old_vts == NULL);
3091 continue;
3092 }
philippea1ac2f42015-05-01 17:12:00 +00003093 tl_assert(old_te->u.remap == VtsID_INVALID);
sewardjffce8152011-06-24 10:09:41 +00003094 tl_assert(old_vts != NULL);
3095 tl_assert(old_vts->id == i);
3096 tl_assert(old_vts->ts != NULL);
3097
3098 /* It is in use. Make a pruned version. */
3099 nBeforePruning++;
3100 nSTSsBefore += old_vts->usedTS;
3101 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
philippec3508652015-03-28 12:01:58 +00003102 old_vts, verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003103 tl_assert(new_vts->sizeTS == new_vts->usedTS);
3104 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
3105 == 0x0ddC0ffeeBadF00dULL);
3106
3107 /* Get rid of the old VTS and the tree entry. It's a bit more
3108 complex to incrementally delete the VTSs now than to nuke
3109 them all after we're done, but the upside is that we don't
3110 wind up temporarily storing potentially two complete copies
3111 of each VTS and hence spiking memory use. */
3112 UWord oldK = 0, oldV = 12345;
3113 Bool present = VG_(delFromFM)( vts_set,
3114 &oldK, &oldV, (UWord)old_vts );
3115 tl_assert(present); /* else it isn't in vts_set ?! */
3116 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3117 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
3118 /* now free the VTS itself */
3119 VTS__delete(old_vts);
3120 old_te->vts = NULL;
3121 old_vts = NULL;
3122
3123 /* NO MENTIONS of old_vts allowed beyond this point. */
3124
3125 /* Ok, we have the pruned copy in new_vts. See if a
3126 structurally identical version is already present in new_set.
3127 If so, delete the one we just made and move on; if not, add
3128 it. */
3129 VTS* identical_version = NULL;
3130 UWord valW = 12345;
3131 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
3132 (UWord)new_vts)) {
3133 // already have it
3134 tl_assert(valW == 0);
3135 tl_assert(identical_version != NULL);
3136 tl_assert(identical_version != new_vts);
3137 VTS__delete(new_vts);
3138 new_vts = identical_version;
3139 tl_assert(new_vts->id != VtsID_INVALID);
3140 } else {
3141 tl_assert(valW == 12345);
3142 tl_assert(identical_version == NULL);
3143 new_vts->id = new_VtsID_ctr++;
3144 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
3145 tl_assert(!b);
3146 VtsTE new_te;
3147 new_te.vts = new_vts;
3148 new_te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00003149 new_te.u.freelink = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003150 Word j = VG_(addToXA)( new_tab, &new_te );
3151 tl_assert(j <= i);
3152 tl_assert(j == new_VtsID_ctr - 1);
3153 // stats
3154 nAfterPruning++;
3155 nSTSsAfter += new_vts->usedTS;
3156 }
philippea1ac2f42015-05-01 17:12:00 +00003157 old_te->u.remap = new_vts->id;
sewardjffce8152011-06-24 10:09:41 +00003158
3159 } /* for (i = 0; i < nTab; i++) */
3160
philippec3508652015-03-28 12:01:58 +00003161 /* Move very dead thread from verydead_thread_table_not_pruned to
3162 verydead_thread_table. Sort and check verydead_thread_table
3163 to verify a thread was reported very dead only once. */
3164 {
3165 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
3166
3167 for (i = 0; i < nBT; i++) {
3168 ThrID thrid =
3169 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
3170 VG_(addToXA)( verydead_thread_table, &thrid );
3171 }
3172 verydead_thread_table_sort_and_check (verydead_thread_table);
3173 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
3174 }
3175
sewardjffce8152011-06-24 10:09:41 +00003176 /* At this point, we have:
philippea1ac2f42015-05-01 17:12:00 +00003177 * the old VTS table, with its u.remap entries set,
sewardjffce8152011-06-24 10:09:41 +00003178 and with all .vts == NULL.
3179 * the old VTS tree should be empty, since it and the old VTSs
3180 it contained have been incrementally deleted was we worked
3181 through the old table.
philippea1ac2f42015-05-01 17:12:00 +00003182 * the new VTS table, with all .rc == 0, all u.freelink and u.remap
sewardjffce8152011-06-24 10:09:41 +00003183 == VtsID_INVALID.
3184 * the new VTS tree.
3185 */
3186 tl_assert( VG_(sizeFM)(vts_set) == 0 );
3187
3188 /* Now actually apply the mapping. */
3189 /* Visit all the VtsIDs in the entire system. Where do we expect
3190 to find them?
3191 (a) in shadow memory -- the LineZs and LineFs
3192 (b) in our collection of struct _Thrs.
3193 (c) in our collection of struct _SOs.
3194 Nowhere else, AFAICS. Not in the zsm cache, because that just
3195 got invalidated.
3196
philippea1ac2f42015-05-01 17:12:00 +00003197 Using the u.remap fields in vts_tab, map each old VtsID to a new
sewardjffce8152011-06-24 10:09:41 +00003198 VtsID. For each old VtsID, dec its rc; and for each new one,
3199 inc it. This sets up the new refcounts, and it also gives a
3200 cheap sanity check of the old ones: all old refcounts should be
3201 zero after this operation.
3202 */
3203
3204 /* Do the mappings for (a) above: iterate over the Primary shadow
3205 mem map (WordFM Addr SecMap*). */
3206 UWord secmapW = 0;
3207 VG_(initIterFM)( map_shmem );
3208 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3209 UWord j;
3210 SecMap* sm = (SecMap*)secmapW;
3211 tl_assert(sm->magic == SecMap_MAGIC);
3212 /* Deal with the LineZs */
3213 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3214 LineZ* lineZ = &sm->linesZ[i];
3215 if (lineZ->dict[0] == SVal_INVALID)
3216 continue; /* not in use -- data is in F rep instead */
3217 for (j = 0; j < 4; j++)
3218 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3219 }
3220 /* Deal with the LineFs */
3221 for (i = 0; i < sm->linesF_size; i++) {
3222 LineF* lineF = &sm->linesF[i];
3223 if (!lineF->inUse)
3224 continue;
3225 for (j = 0; j < N_LINE_ARANGE; j++)
3226 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3227 }
3228 }
3229 VG_(doneIterFM)( map_shmem );
3230
3231 /* Do the mappings for (b) above: visit our collection of struct
3232 _Thrs. */
3233 Thread* hgthread = get_admin_threads();
3234 tl_assert(hgthread);
3235 while (hgthread) {
3236 Thr* hbthr = hgthread->hbthr;
3237 tl_assert(hbthr);
3238 /* Threads that are listed in the prunable set have their viR
3239 and viW set to VtsID_INVALID, so we can't mess with them. */
3240 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3241 tl_assert(hbthr->viR == VtsID_INVALID);
3242 tl_assert(hbthr->viW == VtsID_INVALID);
3243 hgthread = hgthread->admin;
3244 continue;
3245 }
3246 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3247 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3248 hgthread = hgthread->admin;
3249 }
3250
3251 /* Do the mappings for (c) above: visit the struct _SOs. */
3252 SO* so = admin_SO;
3253 while (so) {
3254 if (so->viR != VtsID_INVALID)
3255 remap_VtsID( vts_tab, new_tab, &so->viR );
3256 if (so->viW != VtsID_INVALID)
3257 remap_VtsID( vts_tab, new_tab, &so->viW );
3258 so = so->admin_next;
3259 }
3260
3261 /* So, we're nearly done (with this incredibly complex operation).
3262 Check the refcounts for the old VtsIDs all fell to zero, as
3263 expected. Any failure is serious. */
3264 for (i = 0; i < nTab; i++) {
3265 VtsTE* te = VG_(indexXA)( vts_tab, i );
3266 tl_assert(te->vts == NULL);
3267 /* This is the assert proper. Note we're also asserting
philippea1ac2f42015-05-01 17:12:00 +00003268 zeroness for old entries which are unmapped. That's OK. */
sewardjffce8152011-06-24 10:09:41 +00003269 tl_assert(te->rc == 0);
3270 }
3271
3272 /* Install the new table and set. */
3273 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3274 vts_set = new_set;
3275 VG_(deleteXA)( vts_tab );
3276 vts_tab = new_tab;
3277
3278 /* The freelist of vts_tab entries is empty now, because we've
3279 compacted all of the live entries at the low end of the
3280 table. */
3281 vts_tab_freelist = VtsID_INVALID;
3282
3283 /* Sanity check vts_set and vts_tab. */
3284
3285 /* Because all the live entries got slid down to the bottom of vts_tab: */
3286 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3287
3288 /* Assert that the vts_tab and vts_set entries point at each other
3289 in the required way */
3290 UWord wordK = 0, wordV = 0;
3291 VG_(initIterFM)( vts_set );
3292 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3293 tl_assert(wordK != 0);
3294 tl_assert(wordV == 0);
3295 VTS* vts = (VTS*)wordK;
3296 tl_assert(vts->id != VtsID_INVALID);
3297 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3298 tl_assert(te->vts == vts);
3299 }
3300 VG_(doneIterFM)( vts_set );
3301
3302 /* Also iterate over the table, and check each entry is
3303 plausible. */
3304 nTab = VG_(sizeXA)( vts_tab );
3305 for (i = 0; i < nTab; i++) {
3306 VtsTE* te = VG_(indexXA)( vts_tab, i );
3307 tl_assert(te->vts);
3308 tl_assert(te->vts->id == i);
3309 tl_assert(te->rc > 0); /* 'cos we just GC'd */
philippea1ac2f42015-05-01 17:12:00 +00003310 tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3311 /* value of te->u.remap not relevant */
sewardjffce8152011-06-24 10:09:41 +00003312 }
3313
3314 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
philippe2bd23262015-05-11 20:56:49 +00003315 stats__vts_pruning++;
sewardjffce8152011-06-24 10:09:41 +00003316 if (VG_(clo_stats)) {
sewardjffce8152011-06-24 10:09:41 +00003317 tl_assert(nTab > 0);
3318 VG_(message)(
3319 Vg_DebugMsg,
philippe2bd23262015-05-11 20:56:49 +00003320 "libhb: VTS PR: #%lu before %lu (avg sz %lu) "
sewardjffce8152011-06-24 10:09:41 +00003321 "after %lu (avg sz %lu)\n",
philippe2bd23262015-05-11 20:56:49 +00003322 stats__vts_pruning,
sewardjffce8152011-06-24 10:09:41 +00003323 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3324 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3325 );
3326 }
sewardjffce8152011-06-24 10:09:41 +00003327 /* ---------- END VTS PRUNING ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00003328}
3329
3330
3331/////////////////////////////////////////////////////////
3332// //
3333// Vts IDs //
3334// //
3335/////////////////////////////////////////////////////////
3336
3337//////////////////////////
sewardj7aa38a92011-02-27 23:04:12 +00003338/* A temporary, max-sized VTS which is used as a temporary (the first
3339 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3340static VTS* temp_max_sized_VTS = NULL;
3341
3342//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00003343static ULong stats__cmpLEQ_queries = 0;
3344static ULong stats__cmpLEQ_misses = 0;
3345static ULong stats__join2_queries = 0;
3346static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003347
3348static inline UInt ROL32 ( UInt w, Int n ) {
3349 w = (w << n) | (w >> (32-n));
3350 return w;
3351}
3352static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3353 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3354 return hash % nTab;
3355}
3356
sewardj23f12002009-07-24 08:45:08 +00003357#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00003358static
sewardj23f12002009-07-24 08:45:08 +00003359 struct { VtsID vi1; VtsID vi2; Bool leq; }
3360 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00003361
3362#define N_JOIN2_CACHE 1023
3363static
3364 struct { VtsID vi1; VtsID vi2; VtsID res; }
3365 join2_cache[N_JOIN2_CACHE];
3366
3367static void VtsID__invalidate_caches ( void ) {
3368 Int i;
sewardj23f12002009-07-24 08:45:08 +00003369 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3370 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3371 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3372 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00003373 }
3374 for (i = 0; i < N_JOIN2_CACHE; i++) {
3375 join2_cache[i].vi1 = VtsID_INVALID;
3376 join2_cache[i].vi2 = VtsID_INVALID;
3377 join2_cache[i].res = VtsID_INVALID;
3378 }
3379}
3380//////////////////////////
3381
sewardjd52392d2008-11-08 20:36:26 +00003382//static Bool VtsID__is_valid ( VtsID vi ) {
3383// VtsTE* ve;
3384// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3385// return False;
3386// ve = VG_(indexXA)( vts_tab, vi );
3387// if (!ve->vts)
3388// return False;
3389// tl_assert(ve->vts->id == vi);
3390// return True;
3391//}
sewardjf98e1c02008-10-25 16:22:41 +00003392
3393static VTS* VtsID__to_VTS ( VtsID vi ) {
3394 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3395 tl_assert(te->vts);
3396 return te->vts;
3397}
3398
3399static void VtsID__pp ( VtsID vi ) {
sewardjf98e1c02008-10-25 16:22:41 +00003400 VTS* vts = VtsID__to_VTS(vi);
florianb28fe892014-10-28 20:52:07 +00003401 VTS__show( vts );
sewardjf98e1c02008-10-25 16:22:41 +00003402}
3403
3404/* compute partial ordering relation of vi1 and vi2. */
3405__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00003406static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00003407 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00003408 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00003409 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00003410 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00003411 tl_assert(vi1 != vi2);
3412 ////++
sewardj23f12002009-07-24 08:45:08 +00003413 stats__cmpLEQ_queries++;
3414 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3415 if (cmpLEQ_cache[hash].vi1 == vi1
3416 && cmpLEQ_cache[hash].vi2 == vi2)
3417 return cmpLEQ_cache[hash].leq;
3418 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00003419 ////--
3420 v1 = VtsID__to_VTS(vi1);
3421 v2 = VtsID__to_VTS(vi2);
sewardje4cce742011-02-24 15:25:24 +00003422 leq = VTS__cmpLEQ( v1, v2 ) == 0;
sewardjf98e1c02008-10-25 16:22:41 +00003423 ////++
sewardj23f12002009-07-24 08:45:08 +00003424 cmpLEQ_cache[hash].vi1 = vi1;
3425 cmpLEQ_cache[hash].vi2 = vi2;
3426 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00003427 ////--
sewardj23f12002009-07-24 08:45:08 +00003428 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00003429}
sewardj23f12002009-07-24 08:45:08 +00003430static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3431 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003432}
3433
3434/* compute binary join */
3435__attribute__((noinline))
3436static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3437 UInt hash;
3438 VtsID res;
sewardj7aa38a92011-02-27 23:04:12 +00003439 VTS *vts1, *vts2;
sewardjf98e1c02008-10-25 16:22:41 +00003440 //if (vi1 == vi2) return vi1;
3441 tl_assert(vi1 != vi2);
3442 ////++
3443 stats__join2_queries++;
3444 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3445 if (join2_cache[hash].vi1 == vi1
3446 && join2_cache[hash].vi2 == vi2)
3447 return join2_cache[hash].res;
3448 stats__join2_misses++;
3449 ////--
3450 vts1 = VtsID__to_VTS(vi1);
3451 vts2 = VtsID__to_VTS(vi2);
sewardj7aa38a92011-02-27 23:04:12 +00003452 temp_max_sized_VTS->usedTS = 0;
3453 VTS__join(temp_max_sized_VTS, vts1,vts2);
3454 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003455 ////++
3456 join2_cache[hash].vi1 = vi1;
3457 join2_cache[hash].vi2 = vi2;
3458 join2_cache[hash].res = res;
3459 ////--
3460 return res;
3461}
3462static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003463 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003464}
3465
3466/* create a singleton VTS, namely [thr:1] */
3467static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
sewardj7aa38a92011-02-27 23:04:12 +00003468 temp_max_sized_VTS->usedTS = 0;
3469 VTS__singleton(temp_max_sized_VTS, thr,tym);
3470 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003471}
3472
3473/* tick operation, creates value 1 if specified index is absent */
3474static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3475 VTS* vts = VtsID__to_VTS(vi);
sewardj7aa38a92011-02-27 23:04:12 +00003476 temp_max_sized_VTS->usedTS = 0;
3477 VTS__tick(temp_max_sized_VTS, idx,vts);
3478 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003479}
3480
3481/* index into a VTS (only for assertions) */
3482static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3483 VTS* vts = VtsID__to_VTS(vi);
3484 return VTS__indexAt_SLOW( vts, idx );
3485}
3486
sewardj23f12002009-07-24 08:45:08 +00003487/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3488 any, really) element in vi1 which is pointwise greater-than the
3489 corresponding element in vi2. If no such element exists, return
3490 NULL. This needs to be fairly quick since it is called every time
3491 a race is detected. */
3492static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3493{
3494 VTS *vts1, *vts2;
sewardje4cce742011-02-24 15:25:24 +00003495 Thr* diffthr;
3496 ThrID diffthrid;
sewardj23f12002009-07-24 08:45:08 +00003497 tl_assert(vi1 != vi2);
3498 vts1 = VtsID__to_VTS(vi1);
3499 vts2 = VtsID__to_VTS(vi2);
3500 tl_assert(vts1 != vts2);
sewardje4cce742011-02-24 15:25:24 +00003501 diffthrid = VTS__cmpLEQ(vts1, vts2);
3502 diffthr = Thr__from_ThrID(diffthrid);
sewardj23f12002009-07-24 08:45:08 +00003503 tl_assert(diffthr); /* else they are LEQ ! */
3504 return diffthr;
3505}
3506
3507
3508/////////////////////////////////////////////////////////
3509// //
3510// Filters //
3511// //
3512/////////////////////////////////////////////////////////
3513
sewardj23f12002009-07-24 08:45:08 +00003514/* Forget everything we know -- clear the filter and let everything
3515 through. This needs to be as fast as possible, since it is called
3516 every time the running thread changes, and every time a thread's
3517 vector clocks change, which can be quite frequent. The obvious
3518 fast way to do this is simply to stuff in tags which we know are
3519 not going to match anything, since they're not aligned to the start
3520 of a line. */
florian6bd9dc12012-11-23 16:17:43 +00003521static void Filter__clear ( Filter* fi, const HChar* who )
sewardj23f12002009-07-24 08:45:08 +00003522{
3523 UWord i;
3524 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3525 for (i = 0; i < FI_NUM_LINES; i += 8) {
3526 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3527 fi->tags[i+1] = 1;
3528 fi->tags[i+2] = 1;
3529 fi->tags[i+3] = 1;
3530 fi->tags[i+4] = 1;
3531 fi->tags[i+5] = 1;
3532 fi->tags[i+6] = 1;
3533 fi->tags[i+7] = 1;
3534 }
3535 tl_assert(i == FI_NUM_LINES);
3536}
3537
3538/* Clearing an arbitrary range in the filter. Unfortunately
3539 we have to do this due to core-supplied new/die-mem events. */
3540
3541static void Filter__clear_1byte ( Filter* fi, Addr a )
3542{
3543 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3544 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3545 FiLine* line = &fi->lines[lineno];
3546 UWord loff = (a - atag) / 8;
3547 UShort mask = 0x3 << (2 * (a & 7));
3548 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3549 if (LIKELY( fi->tags[lineno] == atag )) {
3550 /* hit. clear the bits. */
3551 UShort u16 = line->u16s[loff];
3552 line->u16s[loff] = u16 & ~mask; /* clear them */
3553 } else {
3554 /* miss. The filter doesn't hold this address, so ignore. */
3555 }
3556}
3557
3558static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3559{
3560 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3561 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3562 FiLine* line = &fi->lines[lineno];
3563 UWord loff = (a - atag) / 8;
3564 if (LIKELY( fi->tags[lineno] == atag )) {
3565 line->u16s[loff] = 0;
3566 } else {
3567 /* miss. The filter doesn't hold this address, so ignore. */
3568 }
3569}
3570
philippefc00a2a2015-05-15 11:41:54 +00003571/* Only used to verify the fast Filter__clear_range */
3572__attribute__((unused))
3573static void Filter__clear_range_SLOW ( Filter* fi, Addr a, UWord len )
sewardj23f12002009-07-24 08:45:08 +00003574{
philippefc00a2a2015-05-15 11:41:54 +00003575 tl_assert (CHECK_ZSM);
3576
sewardj23f12002009-07-24 08:45:08 +00003577 /* slowly do part preceding 8-alignment */
3578 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3579 Filter__clear_1byte( fi, a );
3580 a++;
3581 len--;
3582 }
3583 /* vector loop */
3584 while (len >= 8) {
3585 Filter__clear_8bytes_aligned( fi, a );
3586 a += 8;
3587 len -= 8;
3588 }
3589 /* slowly do tail */
3590 while (UNLIKELY(len > 0)) {
3591 Filter__clear_1byte( fi, a );
3592 a++;
3593 len--;
3594 }
3595}
3596
philippefc00a2a2015-05-15 11:41:54 +00003597static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3598{
3599# if CHECK_ZSM > 0
3600 /* We check the below more complex algorithm with the simple one.
3601 This check is very expensive : we do first the slow way on a
3602 copy of the data, then do it the fast way. On RETURN, we check
3603 the two values are equal. */
3604 Filter fi_check = *fi;
3605 Filter__clear_range_SLOW(&fi_check, a, len);
3606# define RETURN goto check_and_return
3607# else
3608# define RETURN return
3609# endif
3610
3611 Addr begtag = FI_GET_TAG(a); /* tag of range begin */
3612
3613 Addr end = a + len - 1;
3614 Addr endtag = FI_GET_TAG(end); /* tag of range end. */
3615
3616 UWord rlen = len; /* remaining length to clear */
3617
3618 Addr c = a; /* Current position we are clearing. */
3619 UWord clineno = FI_GET_LINENO(c); /* Current lineno we are clearing */
3620 FiLine* cline; /* Current line we are clearing */
3621 UWord cloff; /* Current offset in line we are clearing, when clearing
3622 partial lines. */
3623
3624 UShort u16;
3625
3626 STATIC_ASSERT (FI_LINE_SZB == 32);
3627 // Below assumes filter lines are 32 bytes
3628
3629 if (LIKELY(fi->tags[clineno] == begtag)) {
3630 /* LIKELY for the heavy caller VG_(unknown_SP_update). */
3631 /* First filter line matches begtag.
3632 If c is not at the filter line begin, the below will clear
3633 the filter line bytes starting from c. */
3634 cline = &fi->lines[clineno];
3635 cloff = (c - begtag) / 8;
3636
3637 /* First the byte(s) needed to reach 8-alignment */
3638 if (UNLIKELY(!VG_IS_8_ALIGNED(c))) {
3639 /* hiB is the nr of bytes (higher addresses) from c to reach
3640 8-aligment. */
3641 UWord hiB = 8 - (c & 7);
3642 /* Compute 2-bit/byte mask representing hiB bytes [c..c+hiB[
3643 mask is C000 , F000, FC00, FF00, FFC0, FFF0 or FFFC for the byte
3644 range 7..7 6..7 5..7 4..7 3..7 2..7 1..7 */
3645 UShort mask = 0xFFFF << (16 - 2*hiB);
3646
3647 u16 = cline->u16s[cloff];
3648 if (LIKELY(rlen >= hiB)) {
3649 cline->u16s[cloff] = u16 & ~mask; /* clear all hiB from c */
3650 rlen -= hiB;
3651 c += hiB;
3652 cloff += 1;
3653 } else {
3654 /* Only have the bits for rlen bytes bytes. */
3655 mask = mask & ~(0xFFFF << (16 - 2*(hiB-rlen)));
3656 cline->u16s[cloff] = u16 & ~mask; /* clear rlen bytes from c. */
3657 RETURN; // We have cleared all what we can.
3658 }
3659 }
3660 /* c is now 8 aligned. Clear by 8 aligned bytes,
3661 till c is filter-line aligned */
3662 while (!VG_IS_32_ALIGNED(c) && rlen >= 8) {
3663 cline->u16s[cloff] = 0;
3664 c += 8;
3665 rlen -= 8;
3666 cloff += 1;
3667 }
3668 } else {
3669 c = begtag + FI_LINE_SZB;
3670 if (c > end)
3671 RETURN; // We have cleared all what we can.
3672 rlen -= c - a;
3673 }
3674 // We have changed c, so re-establish clineno.
3675 clineno = FI_GET_LINENO(c);
3676
3677 if (rlen >= FI_LINE_SZB) {
3678 /* Here, c is filter line-aligned. Clear all full lines that
3679 overlap with the range starting at c, made of a full lines */
3680 UWord nfull = rlen / FI_LINE_SZB;
3681 UWord full_len = nfull * FI_LINE_SZB;
3682 rlen -= full_len;
3683 if (nfull > FI_NUM_LINES)
3684 nfull = FI_NUM_LINES; // no need to check several times the same entry.
3685
3686 for (UWord n = 0; n < nfull; n++) {
3687 if (UNLIKELY(address_in_range(fi->tags[clineno], c, full_len))) {
3688 cline = &fi->lines[clineno];
3689 cline->u16s[0] = 0;
3690 cline->u16s[1] = 0;
3691 cline->u16s[2] = 0;
3692 cline->u16s[3] = 0;
3693 STATIC_ASSERT (4 == sizeof(cline->u16s)/sizeof(cline->u16s[0]));
3694 }
3695 clineno++;
3696 if (UNLIKELY(clineno == FI_NUM_LINES))
3697 clineno = 0;
3698 }
3699
3700 c += full_len;
3701 clineno = FI_GET_LINENO(c);
3702 }
3703
3704 if (CHECK_ZSM) {
3705 tl_assert(VG_IS_8_ALIGNED(c));
3706 tl_assert(clineno == FI_GET_LINENO(c));
3707 }
3708
3709 /* Do the last filter line, if it was not cleared as a full filter line */
3710 if (UNLIKELY(rlen > 0) && fi->tags[clineno] == endtag) {
3711 cline = &fi->lines[clineno];
3712 cloff = (c - endtag) / 8;
3713 if (CHECK_ZSM) tl_assert(FI_GET_TAG(c) == endtag);
3714
3715 /* c is 8 aligned. Clear by 8 aligned bytes, till we have less than
3716 8 bytes. */
3717 while (rlen >= 8) {
3718 cline->u16s[cloff] = 0;
3719 c += 8;
3720 rlen -= 8;
3721 cloff += 1;
3722 }
3723 /* Then the remaining byte(s) */
3724 if (rlen > 0) {
3725 /* nr of bytes from c to reach end. */
3726 UWord loB = rlen;
3727 /* Compute mask representing loB bytes [c..c+loB[ :
3728 mask is 0003, 000F, 003F, 00FF, 03FF, 0FFF or 3FFF */
3729 UShort mask = 0xFFFF >> (16 - 2*loB);
3730
3731 u16 = cline->u16s[cloff];
3732 cline->u16s[cloff] = u16 & ~mask; /* clear all loB from c */
3733 }
3734 }
3735
3736# if CHECK_ZSM > 0
3737 check_and_return:
3738 tl_assert (VG_(memcmp)(&fi_check, fi, sizeof(fi_check)) == 0);
3739# endif
3740# undef RETURN
3741}
sewardj23f12002009-07-24 08:45:08 +00003742
3743/* ------ Read handlers for the filter. ------ */
3744
3745static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3746{
3747 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3748 return False;
3749 {
3750 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3751 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3752 FiLine* line = &fi->lines[lineno];
3753 UWord loff = (a - atag) / 8;
3754 UShort mask = 0xAAAA;
3755 if (LIKELY( fi->tags[lineno] == atag )) {
3756 /* hit. check line and update. */
3757 UShort u16 = line->u16s[loff];
3758 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3759 line->u16s[loff] = u16 | mask; /* set them */
3760 return ok;
3761 } else {
3762 /* miss. nuke existing line and re-use it. */
3763 UWord i;
3764 fi->tags[lineno] = atag;
3765 for (i = 0; i < FI_LINE_SZB / 8; i++)
3766 line->u16s[i] = 0;
3767 line->u16s[loff] = mask;
3768 return False;
3769 }
3770 }
3771}
3772
3773static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3774{
3775 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3776 return False;
3777 {
3778 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3779 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3780 FiLine* line = &fi->lines[lineno];
3781 UWord loff = (a - atag) / 8;
3782 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3783 if (LIKELY( fi->tags[lineno] == atag )) {
3784 /* hit. check line and update. */
3785 UShort u16 = line->u16s[loff];
3786 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3787 line->u16s[loff] = u16 | mask; /* set them */
3788 return ok;
3789 } else {
3790 /* miss. nuke existing line and re-use it. */
3791 UWord i;
3792 fi->tags[lineno] = atag;
3793 for (i = 0; i < FI_LINE_SZB / 8; i++)
3794 line->u16s[i] = 0;
3795 line->u16s[loff] = mask;
3796 return False;
3797 }
3798 }
3799}
3800
3801static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3802{
3803 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3804 return False;
3805 {
3806 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3807 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3808 FiLine* line = &fi->lines[lineno];
3809 UWord loff = (a - atag) / 8;
3810 UShort mask = 0xA << (2 * (a & 6));
3811 /* mask is A000, 0A00, 00A0 or 000A */
3812 if (LIKELY( fi->tags[lineno] == atag )) {
3813 /* hit. check line and update. */
3814 UShort u16 = line->u16s[loff];
3815 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3816 line->u16s[loff] = u16 | mask; /* set them */
3817 return ok;
3818 } else {
3819 /* miss. nuke existing line and re-use it. */
3820 UWord i;
3821 fi->tags[lineno] = atag;
3822 for (i = 0; i < FI_LINE_SZB / 8; i++)
3823 line->u16s[i] = 0;
3824 line->u16s[loff] = mask;
3825 return False;
3826 }
3827 }
3828}
3829
3830static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3831{
3832 {
3833 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3834 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3835 FiLine* line = &fi->lines[lineno];
3836 UWord loff = (a - atag) / 8;
3837 UShort mask = 0x2 << (2 * (a & 7));
3838 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3839 if (LIKELY( fi->tags[lineno] == atag )) {
3840 /* hit. check line and update. */
3841 UShort u16 = line->u16s[loff];
3842 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3843 line->u16s[loff] = u16 | mask; /* set them */
3844 return ok;
3845 } else {
3846 /* miss. nuke existing line and re-use it. */
3847 UWord i;
3848 fi->tags[lineno] = atag;
3849 for (i = 0; i < FI_LINE_SZB / 8; i++)
3850 line->u16s[i] = 0;
3851 line->u16s[loff] = mask;
3852 return False;
3853 }
3854 }
3855}
3856
3857
3858/* ------ Write handlers for the filter. ------ */
3859
3860static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3861{
3862 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3863 return False;
3864 {
3865 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3866 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3867 FiLine* line = &fi->lines[lineno];
3868 UWord loff = (a - atag) / 8;
3869 UShort mask = 0xFFFF;
3870 if (LIKELY( fi->tags[lineno] == atag )) {
3871 /* hit. check line and update. */
3872 UShort u16 = line->u16s[loff];
3873 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3874 line->u16s[loff] = u16 | mask; /* set them */
3875 return ok;
3876 } else {
3877 /* miss. nuke existing line and re-use it. */
3878 UWord i;
3879 fi->tags[lineno] = atag;
3880 for (i = 0; i < FI_LINE_SZB / 8; i++)
3881 line->u16s[i] = 0;
3882 line->u16s[loff] = mask;
3883 return False;
3884 }
3885 }
3886}
3887
3888static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3889{
3890 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3891 return False;
3892 {
3893 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3894 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3895 FiLine* line = &fi->lines[lineno];
3896 UWord loff = (a - atag) / 8;
3897 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3898 if (LIKELY( fi->tags[lineno] == atag )) {
3899 /* hit. check line and update. */
3900 UShort u16 = line->u16s[loff];
3901 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3902 line->u16s[loff] = u16 | mask; /* set them */
3903 return ok;
3904 } else {
3905 /* miss. nuke existing line and re-use it. */
3906 UWord i;
3907 fi->tags[lineno] = atag;
3908 for (i = 0; i < FI_LINE_SZB / 8; i++)
3909 line->u16s[i] = 0;
3910 line->u16s[loff] = mask;
3911 return False;
3912 }
3913 }
3914}
3915
3916static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3917{
3918 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3919 return False;
3920 {
3921 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3922 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3923 FiLine* line = &fi->lines[lineno];
3924 UWord loff = (a - atag) / 8;
3925 UShort mask = 0xF << (2 * (a & 6));
3926 /* mask is F000, 0F00, 00F0 or 000F */
3927 if (LIKELY( fi->tags[lineno] == atag )) {
3928 /* hit. check line and update. */
3929 UShort u16 = line->u16s[loff];
3930 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3931 line->u16s[loff] = u16 | mask; /* set them */
3932 return ok;
3933 } else {
3934 /* miss. nuke existing line and re-use it. */
3935 UWord i;
3936 fi->tags[lineno] = atag;
3937 for (i = 0; i < FI_LINE_SZB / 8; i++)
3938 line->u16s[i] = 0;
3939 line->u16s[loff] = mask;
3940 return False;
3941 }
3942 }
3943}
3944
3945static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3946{
3947 {
3948 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3949 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3950 FiLine* line = &fi->lines[lineno];
3951 UWord loff = (a - atag) / 8;
3952 UShort mask = 0x3 << (2 * (a & 7));
3953 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3954 if (LIKELY( fi->tags[lineno] == atag )) {
3955 /* hit. check line and update. */
3956 UShort u16 = line->u16s[loff];
3957 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3958 line->u16s[loff] = u16 | mask; /* set them */
3959 return ok;
3960 } else {
3961 /* miss. nuke existing line and re-use it. */
3962 UWord i;
3963 fi->tags[lineno] = atag;
3964 for (i = 0; i < FI_LINE_SZB / 8; i++)
3965 line->u16s[i] = 0;
3966 line->u16s[loff] = mask;
3967 return False;
3968 }
3969 }
3970}
3971
sewardjf98e1c02008-10-25 16:22:41 +00003972
3973/////////////////////////////////////////////////////////
3974// //
3975// Threads //
3976// //
3977/////////////////////////////////////////////////////////
3978
sewardje4cce742011-02-24 15:25:24 +00003979/* Maps ThrID values to their Thr*s (which contain ThrID values that
3980 should point back to the relevant slot in the array. Lowest
3981 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
3982static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
3983
3984/* And a counter to dole out ThrID values. For rationale/background,
3985 see comments on definition of ScalarTS (far) above. */
sewardj7aa38a92011-02-27 23:04:12 +00003986static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
sewardje4cce742011-02-24 15:25:24 +00003987
3988static ThrID Thr__to_ThrID ( Thr* thr ) {
3989 return thr->thrid;
3990}
3991static Thr* Thr__from_ThrID ( UInt thrid ) {
3992 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
3993 tl_assert(thr->thrid == thrid);
3994 return thr;
3995}
3996
3997static Thr* Thr__new ( void )
3998{
sewardjf98e1c02008-10-25 16:22:41 +00003999 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
4000 thr->viR = VtsID_INVALID;
4001 thr->viW = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00004002 thr->llexit_done = False;
4003 thr->joinedwith_done = False;
sewardj23f12002009-07-24 08:45:08 +00004004 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
philippeca903bb2014-04-26 22:50:08 +00004005 if (HG_(clo_history_level) == 1)
4006 thr->local_Kws_n_stacks
4007 = VG_(newXA)( HG_(zalloc),
4008 "libhb.Thr__new.3 (local_Kws_and_stacks)",
4009 HG_(free), sizeof(ULong_n_EC) );
sewardje4cce742011-02-24 15:25:24 +00004010
4011 /* Add this Thr* <-> ThrID binding to the mapping, and
4012 cross-check */
4013 if (!thrid_to_thr_map) {
4014 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
4015 HG_(free), sizeof(Thr*) );
sewardje4cce742011-02-24 15:25:24 +00004016 }
4017
sewardj7aa38a92011-02-27 23:04:12 +00004018 if (thrid_counter >= ThrID_MAX_VALID) {
sewardje4cce742011-02-24 15:25:24 +00004019 /* We're hosed. We have to stop. */
4020 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
4021 }
4022
4023 thr->thrid = thrid_counter++;
4024 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
4025 tl_assert(ix + 1024 == thr->thrid);
4026
sewardjf98e1c02008-10-25 16:22:41 +00004027 return thr;
4028}
4029
sewardj8ab2c132009-08-02 09:34:35 +00004030static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00004031{
4032 Word nPresent;
4033 ULong_n_EC pair;
4034 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00004035
4036 // We only collect this info at history level 1 (approx)
4037 if (HG_(clo_history_level) != 1)
4038 return;
4039
sewardj8ab2c132009-08-02 09:34:35 +00004040 /* This is the scalar Kw for thr. */
4041 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00004042 pair.ec = main_get_EC( thr );
4043 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00004044 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004045
4046 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00004047 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00004048
4049 /* Throw away old stacks, if necessary. We can't accumulate stuff
4050 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00004051 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
4052 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
4053 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4054 if (0)
4055 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00004056 thr, pair.ull, pair.ec );
4057 }
4058
4059 if (nPresent > 0) {
4060 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00004061 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
4062 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00004063 }
4064
4065 if (nPresent == 0)
4066 pair.ec = NULL;
4067
sewardj8ab2c132009-08-02 09:34:35 +00004068 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00004069
4070 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00004071 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00004072 thr, pair.ull, pair.ec );
4073 if (0)
4074 VG_(pp_ExeContext)(pair.ec);
4075}
4076
florian6bd9dc12012-11-23 16:17:43 +00004077static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
4078 const ULong_n_EC* pair2 )
sewardj23f12002009-07-24 08:45:08 +00004079{
4080 if (pair1->ull < pair2->ull) return -1;
4081 if (pair1->ull > pair2->ull) return 1;
4082 return 0;
4083}
4084
sewardjf98e1c02008-10-25 16:22:41 +00004085
4086/////////////////////////////////////////////////////////
4087// //
4088// Shadow Values //
4089// //
4090/////////////////////////////////////////////////////////
4091
4092// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
4093// hb_zsm.h. We have to do everything else here.
4094
4095/* SVal is 64 bit unsigned int.
4096
4097 <---------30---------> <---------30--------->
4098 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00004099 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00004100 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
4101
sewardjf98e1c02008-10-25 16:22:41 +00004102*/
4103#define SVAL_TAGMASK (3ULL << 62)
4104
4105static inline Bool SVal__isC ( SVal s ) {
4106 return (0ULL << 62) == (s & SVAL_TAGMASK);
4107}
4108static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
4109 //tl_assert(VtsID__is_valid(rmini));
4110 //tl_assert(VtsID__is_valid(wmini));
4111 return (((ULong)rmini) << 32) | ((ULong)wmini);
4112}
4113static inline VtsID SVal__unC_Rmin ( SVal s ) {
4114 tl_assert(SVal__isC(s));
4115 return (VtsID)(s >> 32);
4116}
4117static inline VtsID SVal__unC_Wmin ( SVal s ) {
4118 tl_assert(SVal__isC(s));
4119 return (VtsID)(s & 0xFFFFFFFFULL);
4120}
4121
sewardj23f12002009-07-24 08:45:08 +00004122static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004123 return (2ULL << 62) == (s & SVAL_TAGMASK);
4124}
sewardj5aa09bf2014-06-20 14:25:53 +00004125__attribute__((unused))
sewardj23f12002009-07-24 08:45:08 +00004126static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00004127 return 2ULL << 62;
4128}
4129
4130/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00004131static inline void SVal__rcinc ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004132 if (SVal__isC(s)) {
4133 VtsID__rcinc( SVal__unC_Rmin(s) );
4134 VtsID__rcinc( SVal__unC_Wmin(s) );
4135 }
4136}
4137
4138/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00004139static inline void SVal__rcdec ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004140 if (SVal__isC(s)) {
4141 VtsID__rcdec( SVal__unC_Rmin(s) );
4142 VtsID__rcdec( SVal__unC_Wmin(s) );
4143 }
4144}
4145
4146
4147/////////////////////////////////////////////////////////
4148// //
4149// Change-event map2 //
4150// //
4151/////////////////////////////////////////////////////////
4152
sewardjf98e1c02008-10-25 16:22:41 +00004153/* This is in two parts:
4154
sewardj23f12002009-07-24 08:45:08 +00004155 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00004156 traces. When the reference count of a stack trace becomes zero,
4157 it is removed from the set and freed up. The intent is to have
4158 a set of stack traces which can be referred to from (2), but to
4159 only represent each one once. The set is indexed/searched by
4160 ordering on the stack trace vectors.
4161
sewardj849b0ed2008-12-21 10:43:10 +00004162 2. A SparseWA of OldRefs. These store information about each old
4163 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00004164 location for which the information is recorded. For LRU
philippecabdbb52015-04-20 21:33:16 +00004165 purposes, each OldRef in the SparseWA is also on a doubly
4166 linked list maintaining the order in which the OldRef were most
4167 recently accessed.
sewardjf98e1c02008-10-25 16:22:41 +00004168
4169 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00004170 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
4171 size) triples to RCECs. This allows us to collect the last
4172 access-traceback by up to N_OLDREF_ACCS different triples for
4173 this location. The accs[] array is a MTF-array. If a binding
4174 falls off the end, that's too bad -- we will lose info about
4175 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00004176
philippecabdbb52015-04-20 21:33:16 +00004177 We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
4178 Then we do exact LRU discarding. For each discarded OldRef we must
sewardjf98e1c02008-10-25 16:22:41 +00004179 of course decrement the reference count on the all RCECs it
4180 refers to, in order that entries from (1) eventually get
4181 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00004182
4183 A major improvement in reliability of this mechanism would be to
4184 have a dynamically sized OldRef.accs[] array, so no entries ever
4185 fall off the end. In investigations (Dec 08) it appears that a
4186 major cause for the non-availability of conflicting-access traces
4187 in race reports is caused by the fixed size of this array. I
4188 suspect for most OldRefs, only a few entries are used, but for a
4189 minority of cases there is an overflow, leading to info lossage.
4190 Investigations also suggest this is very workload and scheduling
4191 sensitive. Therefore a dynamic sizing would be better.
4192
philippe6643e962012-01-17 21:16:30 +00004193 However, dynamic sizing would defeat the use of a PoolAllocator
sewardj849b0ed2008-12-21 10:43:10 +00004194 for OldRef structures. And that's important for performance. So
4195 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00004196*/
4197
4198
4199static UWord stats__ctxt_rcdec1 = 0;
4200static UWord stats__ctxt_rcdec2 = 0;
4201static UWord stats__ctxt_rcdec3 = 0;
4202static UWord stats__ctxt_rcdec_calls = 0;
4203static UWord stats__ctxt_rcdec_discards = 0;
4204static UWord stats__ctxt_rcdec1_eq = 0;
4205
4206static UWord stats__ctxt_tab_curr = 0;
4207static UWord stats__ctxt_tab_max = 0;
4208
4209static UWord stats__ctxt_tab_qs = 0;
4210static UWord stats__ctxt_tab_cmps = 0;
4211
4212
4213///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00004214//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00004215///
4216
4217#define N_FRAMES 8
4218
4219// (UInt) `echo "Reference Counted Execution Context" | md5sum`
4220#define RCEC_MAGIC 0xab88abb2UL
4221
4222//#define N_RCEC_TAB 98317 /* prime */
4223#define N_RCEC_TAB 196613 /* prime */
4224
4225typedef
4226 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00004227 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00004228 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00004229 UWord rc;
4230 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00004231 UWord frames_hash; /* hash of all the frames */
4232 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00004233 }
4234 RCEC;
4235
philippecabdbb52015-04-20 21:33:16 +00004236//////////// BEGIN RCEC pool allocator
4237static PoolAlloc* rcec_pool_allocator;
4238static RCEC* alloc_RCEC ( void ) {
4239 return VG_(allocEltPA) ( rcec_pool_allocator );
4240}
4241
4242static void free_RCEC ( RCEC* rcec ) {
4243 tl_assert(rcec->magic == RCEC_MAGIC);
4244 VG_(freeEltPA)( rcec_pool_allocator, rcec );
4245}
4246//////////// END RCEC pool allocator
4247
sewardjf98e1c02008-10-25 16:22:41 +00004248static RCEC** contextTab = NULL; /* hash table of RCEC*s */
4249
philippecabdbb52015-04-20 21:33:16 +00004250/* Count of allocated RCEC having ref count > 0 */
4251static UWord RCEC_referenced = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004252
4253/* Gives an arbitrary total order on RCEC .frames fields */
4254static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4255 Word i;
4256 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4257 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00004258 if (ec1->frames_hash < ec2->frames_hash) return -1;
4259 if (ec1->frames_hash > ec2->frames_hash) return 1;
4260 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004261 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00004262 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00004263 }
4264 return 0;
4265}
4266
4267
4268/* Dec the ref of this RCEC. */
4269static void ctxt__rcdec ( RCEC* ec )
4270{
4271 stats__ctxt_rcdec_calls++;
4272 tl_assert(ec && ec->magic == RCEC_MAGIC);
4273 tl_assert(ec->rc > 0);
4274 ec->rc--;
philippecabdbb52015-04-20 21:33:16 +00004275 if (ec->rc == 0)
4276 RCEC_referenced--;
sewardjf98e1c02008-10-25 16:22:41 +00004277}
4278
4279static void ctxt__rcinc ( RCEC* ec )
4280{
4281 tl_assert(ec && ec->magic == RCEC_MAGIC);
philippecabdbb52015-04-20 21:33:16 +00004282 if (ec->rc == 0)
4283 RCEC_referenced++;
sewardjf98e1c02008-10-25 16:22:41 +00004284 ec->rc++;
4285}
4286
4287
4288/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
4289 move it one step closer the the front of the list, so as to make
4290 subsequent searches for it cheaper. */
4291static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4292{
4293 RCEC *ec0, *ec1, *ec2;
4294 if (ec == *headp)
4295 tl_assert(0); /* already at head of list */
4296 tl_assert(ec != NULL);
4297 ec0 = *headp;
4298 ec1 = NULL;
4299 ec2 = NULL;
4300 while (True) {
4301 if (ec0 == NULL || ec0 == ec) break;
4302 ec2 = ec1;
4303 ec1 = ec0;
4304 ec0 = ec0->next;
4305 }
4306 tl_assert(ec0 == ec);
4307 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4308 RCEC* tmp;
4309 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4310 predecessor. Swap ec0 and ec1, that is, move ec0 one step
4311 closer to the start of the list. */
4312 tl_assert(ec2->next == ec1);
4313 tl_assert(ec1->next == ec0);
4314 tmp = ec0->next;
4315 ec2->next = ec0;
4316 ec0->next = ec1;
4317 ec1->next = tmp;
4318 }
4319 else
4320 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4321 /* it's second in the list. */
4322 tl_assert(*headp == ec1);
4323 tl_assert(ec1->next == ec0);
4324 ec1->next = ec0->next;
4325 ec0->next = ec1;
4326 *headp = ec0;
4327 }
4328}
4329
4330
4331/* Find the given RCEC in the tree, and return a pointer to it. Or,
4332 if not present, add the given one to the tree (by making a copy of
4333 it, so the caller can immediately deallocate the original) and
4334 return a pointer to the copy. The caller can safely have 'example'
4335 on its stack, since we will always return a pointer to a copy of
4336 it, not to the original. Note that the inserted node will have .rc
4337 of zero and so the caller must immediatly increment it. */
4338__attribute__((noinline))
4339static RCEC* ctxt__find_or_add ( RCEC* example )
4340{
4341 UWord hent;
4342 RCEC* copy;
4343 tl_assert(example && example->magic == RCEC_MAGIC);
4344 tl_assert(example->rc == 0);
4345
4346 /* Search the hash table to see if we already have it. */
4347 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00004348 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00004349 copy = contextTab[hent];
4350 while (1) {
4351 if (!copy) break;
4352 tl_assert(copy->magic == RCEC_MAGIC);
4353 stats__ctxt_tab_cmps++;
4354 if (0 == RCEC__cmp_by_frames(copy, example)) break;
4355 copy = copy->next;
4356 }
4357
4358 if (copy) {
4359 tl_assert(copy != example);
4360 /* optimisation: if it's not at the head of its list, move 1
4361 step fwds, to make future searches cheaper */
4362 if (copy != contextTab[hent]) {
4363 move_RCEC_one_step_forward( &contextTab[hent], copy );
4364 }
4365 } else {
sewardjd86e3a22008-12-03 11:39:37 +00004366 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00004367 tl_assert(copy != example);
4368 *copy = *example;
4369 copy->next = contextTab[hent];
4370 contextTab[hent] = copy;
4371 stats__ctxt_tab_curr++;
4372 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4373 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4374 }
4375 return copy;
4376}
4377
4378static inline UWord ROLW ( UWord w, Int n )
4379{
4380 Int bpw = 8 * sizeof(UWord);
4381 w = (w << n) | (w >> (bpw-n));
4382 return w;
4383}
4384
4385__attribute__((noinline))
4386static RCEC* get_RCEC ( Thr* thr )
4387{
4388 UWord hash, i;
4389 RCEC example;
4390 example.magic = RCEC_MAGIC;
4391 example.rc = 0;
4392 example.rcX = 0;
florian195623b2013-01-22 00:25:05 +00004393 example.next = NULL;
njn6c83d5e2009-05-05 23:46:24 +00004394 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00004395 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00004396 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004397 hash ^= example.frames[i];
4398 hash = ROLW(hash, 19);
4399 }
njn6c83d5e2009-05-05 23:46:24 +00004400 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00004401 return ctxt__find_or_add( &example );
4402}
4403
4404///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00004405//// Part (2):
4406/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00004407///
4408
sewardjffce8152011-06-24 10:09:41 +00004409/* Records an access: a thread, a context (size & writeness) and the
4410 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
4411 2, 10 = 4, 11 = 8.
sewardjc5ea9962008-12-07 01:41:46 +00004412*/
sewardjffce8152011-06-24 10:09:41 +00004413typedef
4414 struct {
4415 RCEC* rcec;
4416 WordSetID locksHeldW;
4417 UInt thrid : SCALARTS_N_THRBITS;
4418 UInt szLg2B : 2;
4419 UInt isW : 1;
4420 }
4421 Thr_n_RCEC;
sewardjf98e1c02008-10-25 16:22:41 +00004422
sewardj849b0ed2008-12-21 10:43:10 +00004423#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00004424
4425typedef
philippecabdbb52015-04-20 21:33:16 +00004426 struct OldRef {
4427 struct OldRef *prev; // to refs older than this one
4428 struct OldRef *next; // to refs newer that this one
4429 Addr ga; // Address for which we record up to N_OLDREF_ACCS accesses.
sewardjffce8152011-06-24 10:09:41 +00004430 /* unused slots in this array have .thrid == 0, which is invalid */
sewardjf98e1c02008-10-25 16:22:41 +00004431 Thr_n_RCEC accs[N_OLDREF_ACCS];
4432 }
4433 OldRef;
philippecabdbb52015-04-20 21:33:16 +00004434/* We need ga in OldRef in order to remove OldRef from the sparsewa
4435 by key (i.e. ga) when re-using the lru OldRef. */
sewardjd86e3a22008-12-03 11:39:37 +00004436
philippe6643e962012-01-17 21:16:30 +00004437//////////// BEGIN OldRef pool allocator
4438static PoolAlloc* oldref_pool_allocator;
philippecabdbb52015-04-20 21:33:16 +00004439// Note: We only allocate elements in this pool allocator, we never free them.
4440// We stop allocating elements at VG_(clo_conflict_cache_size).
philippe6643e962012-01-17 21:16:30 +00004441//////////// END OldRef pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00004442
philippecabdbb52015-04-20 21:33:16 +00004443static OldRef mru;
4444static OldRef lru;
4445// A double linked list, chaining all OldREf in a mru/lru order.
4446// mru/lru are sentinel nodes.
4447// Whenever an oldref is re-used, its position is changed as the most recently
4448// used (i.e. pointed to by mru.prev).
4449// When a new oldref is needed, it is allocated from the pool
4450// if we have not yet reached --conflict-cache-size.
4451// Otherwise, if all oldref have already been allocated,
4452// the least recently used (i.e. pointed to by lru.next) is re-used.
4453// When an OldRef is used, it is moved as the most recently used entry
4454// (i.e. pointed to by mru.prev).
4455
4456// Removes r from the double linked list
4457// Note: we do not need to test for special cases such as
4458// NULL next or prev pointers, because we have sentinel nodes
4459// at both sides of the list. So, a node is always forward and
4460// backward linked.
4461static inline void OldRef_unchain(OldRef *r)
4462{
4463 r->next->prev = r->prev;
4464 r->prev->next = r->next;
4465}
4466
4467// Insert new as the newest OldRef
4468// Similarly to OldRef_unchain, no need to test for NULL
4469// pointers, as e.g. mru.prev is always guaranteed to point
4470// to a non NULL node (lru when the list is empty).
4471static inline void OldRef_newest(OldRef *new)
4472{
4473 new->next = &mru;
4474 new->prev = mru.prev;
4475 mru.prev = new;
4476 new->prev->next = new;
4477}
sewardjd86e3a22008-12-03 11:39:37 +00004478
sewardjbc307e52008-12-06 22:10:54 +00004479static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
sewardjbc307e52008-12-06 22:10:54 +00004480static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
philippecabdbb52015-04-20 21:33:16 +00004481/* Note: the nr of ref in the oldrefTree will always be equal to
4482 the nr of elements that were allocated from the OldRef pool allocator
4483 as we never free an OldRef : we just re-use them. */
4484
4485
4486/* allocates a new OldRef or re-use the lru one if all allowed OldRef
4487 have already been allocated. */
4488static OldRef* alloc_or_reuse_OldRef ( void )
4489{
4490 if (oldrefTreeN < HG_(clo_conflict_cache_size)) {
4491 oldrefTreeN++;
4492 return VG_(allocEltPA) ( oldref_pool_allocator );
4493 } else {
4494 Bool b;
4495 UWord valW;
4496 OldRef *oldref = lru.next;
4497
4498 OldRef_unchain(oldref);
4499 b = VG_(delFromSWA)( oldrefTree, &valW, oldref->ga );
4500 tl_assert(b);
4501 tl_assert (oldref == (OldRef*)valW);
4502
4503 for (UInt i = 0; i < N_OLDREF_ACCS; i++) {
4504 ThrID aThrID = oldref->accs[i].thrid;
4505 RCEC* aRef = oldref->accs[i].rcec;
4506 if (aRef) {
4507 tl_assert(aThrID != 0);
4508 stats__ctxt_rcdec3++;
4509 ctxt__rcdec( aRef );
4510 } else {
4511 tl_assert(aThrID == 0);
4512 }
4513 }
4514 return oldref;
4515 }
4516}
4517
sewardjf98e1c02008-10-25 16:22:41 +00004518
sewardj1669cc72008-12-13 01:20:21 +00004519inline static UInt min_UInt ( UInt a, UInt b ) {
4520 return a < b ? a : b;
4521}
4522
sewardja781be62008-12-08 00:12:28 +00004523/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4524 first interval is lower, 1 if the first interval is higher, and 0
4525 if there is any overlap. Redundant paranoia with casting is there
4526 following what looked distinctly like a bug in gcc-4.1.2, in which
4527 some of the comparisons were done signedly instead of
4528 unsignedly. */
4529/* Copied from exp-ptrcheck/sg_main.c */
4530static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4531 Addr a2, SizeT n2 ) {
4532 UWord a1w = (UWord)a1;
4533 UWord n1w = (UWord)n1;
4534 UWord a2w = (UWord)a2;
4535 UWord n2w = (UWord)n2;
4536 tl_assert(n1w > 0 && n2w > 0);
4537 if (a1w + n1w <= a2w) return -1L;
4538 if (a2w + n2w <= a1w) return 1L;
4539 return 0;
4540}
4541
sewardjc5ea9962008-12-07 01:41:46 +00004542static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00004543{
sewardjd86e3a22008-12-03 11:39:37 +00004544 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00004545 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00004546 Word i, j;
philippe40648e22015-04-11 11:42:22 +00004547 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004548 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004549
sewardjffce8152011-06-24 10:09:41 +00004550 tl_assert(thr);
4551 ThrID thrid = thr->thrid;
4552 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4553
4554 WordSetID locksHeldW = thr->hgthread->locksetW;
4555
sewardjc5ea9962008-12-07 01:41:46 +00004556 rcec = get_RCEC( thr );
4557 ctxt__rcinc(rcec);
4558
sewardjffce8152011-06-24 10:09:41 +00004559 UInt szLg2B = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004560 switch (szB) {
4561 /* This doesn't look particularly branch-predictor friendly. */
sewardjffce8152011-06-24 10:09:41 +00004562 case 1: szLg2B = 0; break;
4563 case 2: szLg2B = 1; break;
4564 case 4: szLg2B = 2; break;
4565 case 8: szLg2B = 3; break;
sewardjc5ea9962008-12-07 01:41:46 +00004566 default: tl_assert(0);
4567 }
4568
sewardjffce8152011-06-24 10:09:41 +00004569 /* Look in the map to see if we already have a record for this
4570 address. */
philippe40648e22015-04-11 11:42:22 +00004571 b = VG_(lookupSWA)( oldrefTree, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00004572
sewardjd86e3a22008-12-03 11:39:37 +00004573 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00004574
4575 /* We already have a record for this address. We now need to
sewardjffce8152011-06-24 10:09:41 +00004576 see if we have a stack trace pertaining to this (thrid, R/W,
sewardj849b0ed2008-12-21 10:43:10 +00004577 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00004578 ref = (OldRef*)valW;
philippecabdbb52015-04-20 21:33:16 +00004579
4580 tl_assert (ref->ga == a);
sewardjf98e1c02008-10-25 16:22:41 +00004581
sewardjf98e1c02008-10-25 16:22:41 +00004582 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004583 if (ref->accs[i].thrid != thrid)
sewardj849b0ed2008-12-21 10:43:10 +00004584 continue;
sewardjffce8152011-06-24 10:09:41 +00004585 if (ref->accs[i].szLg2B != szLg2B)
4586 continue;
4587 if (ref->accs[i].isW != (UInt)(isW & 1))
sewardj849b0ed2008-12-21 10:43:10 +00004588 continue;
4589 /* else we have a match, so stop looking. */
4590 break;
sewardjf98e1c02008-10-25 16:22:41 +00004591 }
4592
4593 if (i < N_OLDREF_ACCS) {
sewardjffce8152011-06-24 10:09:41 +00004594 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */
sewardjf98e1c02008-10-25 16:22:41 +00004595 if (i > 0) {
4596 Thr_n_RCEC tmp = ref->accs[i-1];
4597 ref->accs[i-1] = ref->accs[i];
4598 ref->accs[i] = tmp;
4599 i--;
4600 }
sewardjc5ea9962008-12-07 01:41:46 +00004601 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00004602 stats__ctxt_rcdec1++;
sewardjffce8152011-06-24 10:09:41 +00004603 ctxt__rcdec( ref->accs[i].rcec );
4604 tl_assert(ref->accs[i].thrid == thrid);
4605 /* Update the RCEC and the W-held lockset. */
4606 ref->accs[i].rcec = rcec;
4607 ref->accs[i].locksHeldW = locksHeldW;
sewardjf98e1c02008-10-25 16:22:41 +00004608 } else {
sewardjffce8152011-06-24 10:09:41 +00004609 /* No entry for this (thread, R/W, size, nWHeld) quad.
4610 Shuffle all of them down one slot, and put the new entry
4611 at the start of the array. */
4612 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
sewardjf98e1c02008-10-25 16:22:41 +00004613 /* the last slot is in use. We must dec the rc on the
4614 associated rcec. */
4615 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
4616 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00004617 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
4618 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjffce8152011-06-24 10:09:41 +00004619 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
sewardjf98e1c02008-10-25 16:22:41 +00004620 } else {
4621 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
4622 }
4623 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
4624 ref->accs[j] = ref->accs[j-1];
sewardjffce8152011-06-24 10:09:41 +00004625 ref->accs[0].thrid = thrid;
4626 ref->accs[0].szLg2B = szLg2B;
4627 ref->accs[0].isW = (UInt)(isW & 1);
4628 ref->accs[0].locksHeldW = locksHeldW;
4629 ref->accs[0].rcec = rcec;
4630 /* thrid==0 is used to signify an empty slot, so we can't
4631 add zero thrid (such a ThrID is invalid anyway). */
4632 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
sewardjf98e1c02008-10-25 16:22:41 +00004633 }
4634
philippecabdbb52015-04-20 21:33:16 +00004635 OldRef_unchain(ref);
4636 OldRef_newest(ref);
sewardjf98e1c02008-10-25 16:22:41 +00004637
4638 } else {
4639
4640 /* We don't have a record for this address. Create a new one. */
philippecabdbb52015-04-20 21:33:16 +00004641 ref = alloc_or_reuse_OldRef();
4642 ref->ga = a;
sewardjffce8152011-06-24 10:09:41 +00004643 ref->accs[0].thrid = thrid;
4644 ref->accs[0].szLg2B = szLg2B;
4645 ref->accs[0].isW = (UInt)(isW & 1);
4646 ref->accs[0].locksHeldW = locksHeldW;
4647 ref->accs[0].rcec = rcec;
4648
4649 /* thrid==0 is used to signify an empty slot, so we can't
4650 add zero thrid (such a ThrID is invalid anyway). */
4651 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4652
4653 /* Clear out the rest of the entries */
sewardjf98e1c02008-10-25 16:22:41 +00004654 for (j = 1; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004655 ref->accs[j].rcec = NULL;
4656 ref->accs[j].thrid = 0;
4657 ref->accs[j].szLg2B = 0;
4658 ref->accs[j].isW = 0;
4659 ref->accs[j].locksHeldW = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004660 }
sewardjbc307e52008-12-06 22:10:54 +00004661 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
philippecabdbb52015-04-20 21:33:16 +00004662 OldRef_newest (ref);
sewardjf98e1c02008-10-25 16:22:41 +00004663 }
4664}
4665
4666
sewardjffce8152011-06-24 10:09:41 +00004667/* Extract info from the conflicting-access machinery. */
sewardjc5ea9962008-12-07 01:41:46 +00004668Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjffce8152011-06-24 10:09:41 +00004669 /*OUT*/Thr** resThr,
4670 /*OUT*/SizeT* resSzB,
4671 /*OUT*/Bool* resIsW,
4672 /*OUT*/WordSetID* locksHeldW,
sewardjc5ea9962008-12-07 01:41:46 +00004673 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00004674{
sewardja781be62008-12-08 00:12:28 +00004675 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00004676 OldRef* ref;
philippe40648e22015-04-11 11:42:22 +00004677 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004678 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004679
sewardjffce8152011-06-24 10:09:41 +00004680 ThrID cand_thrid;
4681 RCEC* cand_rcec;
4682 Bool cand_isW;
4683 SizeT cand_szB;
4684 WordSetID cand_locksHeldW;
4685 Addr cand_a;
sewardja781be62008-12-08 00:12:28 +00004686
4687 Addr toCheck[15];
4688 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004689
4690 tl_assert(thr);
4691 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00004692
sewardjffce8152011-06-24 10:09:41 +00004693 ThrID thrid = thr->thrid;
4694
sewardja781be62008-12-08 00:12:28 +00004695 toCheck[nToCheck++] = a;
4696 for (i = -7; i < (Word)szB; i++) {
4697 if (i != 0)
4698 toCheck[nToCheck++] = a + i;
4699 }
4700 tl_assert(nToCheck <= 15);
4701
4702 /* Now see if we can find a suitable matching event for
4703 any of the addresses in toCheck[0 .. nToCheck-1]. */
4704 for (j = 0; j < nToCheck; j++) {
4705
4706 cand_a = toCheck[j];
4707 // VG_(printf)("test %ld %p\n", j, cand_a);
4708
philippe40648e22015-04-11 11:42:22 +00004709 b = VG_(lookupSWA)( oldrefTree, &valW, cand_a );
sewardja781be62008-12-08 00:12:28 +00004710 if (!b)
4711 continue;
4712
sewardjd86e3a22008-12-03 11:39:37 +00004713 ref = (OldRef*)valW;
sewardjffce8152011-06-24 10:09:41 +00004714 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
sewardjf98e1c02008-10-25 16:22:41 +00004715
sewardjffce8152011-06-24 10:09:41 +00004716 cand_thrid = 0; /* invalid; see comments in event_map_bind */
4717 cand_rcec = NULL;
4718 cand_isW = False;
4719 cand_szB = 0;
4720 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
sewardjf98e1c02008-10-25 16:22:41 +00004721
sewardjc5ea9962008-12-07 01:41:46 +00004722 for (i = 0; i < N_OLDREF_ACCS; i++) {
4723 Thr_n_RCEC* cand = &ref->accs[i];
sewardjffce8152011-06-24 10:09:41 +00004724 cand_rcec = cand->rcec;
4725 cand_thrid = cand->thrid;
4726 cand_isW = (Bool)cand->isW;
4727 cand_szB = 1 << cand->szLg2B;
4728 cand_locksHeldW = cand->locksHeldW;
sewardjc5ea9962008-12-07 01:41:46 +00004729
sewardjffce8152011-06-24 10:09:41 +00004730 if (cand_thrid == 0)
sewardjc5ea9962008-12-07 01:41:46 +00004731 /* This slot isn't in use. Ignore it. */
4732 continue;
4733
sewardjffce8152011-06-24 10:09:41 +00004734 if (cand_thrid == thrid)
sewardjc5ea9962008-12-07 01:41:46 +00004735 /* This is an access by the same thread, but we're only
4736 interested in accesses from other threads. Ignore. */
4737 continue;
4738
4739 if ((!cand_isW) && (!isW))
4740 /* We don't want to report a read racing against another
4741 read; that's stupid. So in this case move on. */
4742 continue;
4743
sewardja781be62008-12-08 00:12:28 +00004744 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
4745 /* No overlap with the access we're asking about. Ignore. */
4746 continue;
4747
sewardjc5ea9962008-12-07 01:41:46 +00004748 /* We have a match. Stop searching. */
4749 break;
4750 }
4751
4752 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
4753
sewardja781be62008-12-08 00:12:28 +00004754 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00004755 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00004756 /* return with success */
sewardjffce8152011-06-24 10:09:41 +00004757 tl_assert(cand_thrid);
sewardja781be62008-12-08 00:12:28 +00004758 tl_assert(cand_rcec);
4759 tl_assert(cand_rcec->magic == RCEC_MAGIC);
4760 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00004761 /* Count how many non-zero frames we have. */
4762 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4763 for (n = 0; n < maxNFrames; n++) {
4764 if (0 == cand_rcec->frames[n]) break;
4765 }
sewardjffce8152011-06-24 10:09:41 +00004766 *resEC = VG_(make_ExeContext_from_StackTrace)
4767 (cand_rcec->frames, n);
4768 *resThr = Thr__from_ThrID(cand_thrid);
4769 *resSzB = cand_szB;
4770 *resIsW = cand_isW;
4771 *locksHeldW = cand_locksHeldW;
sewardja781be62008-12-08 00:12:28 +00004772 return True;
4773 }
sewardjc5ea9962008-12-07 01:41:46 +00004774
sewardja781be62008-12-08 00:12:28 +00004775 /* consider next address in toCheck[] */
4776 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00004777
sewardja781be62008-12-08 00:12:28 +00004778 /* really didn't find anything. */
4779 return False;
sewardjf98e1c02008-10-25 16:22:41 +00004780}
4781
4782static void event_map_init ( void )
4783{
4784 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00004785
philippe6643e962012-01-17 21:16:30 +00004786 /* Context (RCEC) pool allocator */
4787 rcec_pool_allocator = VG_(newPA) (
4788 sizeof(RCEC),
4789 1000 /* RCECs per pool */,
4790 HG_(zalloc),
4791 "libhb.event_map_init.1 (RCEC pools)",
4792 HG_(free)
4793 );
sewardjd86e3a22008-12-03 11:39:37 +00004794
4795 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00004796 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00004797 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00004798 N_RCEC_TAB * sizeof(RCEC*) );
sewardjf98e1c02008-10-25 16:22:41 +00004799 for (i = 0; i < N_RCEC_TAB; i++)
4800 contextTab[i] = NULL;
4801
philippe6643e962012-01-17 21:16:30 +00004802 /* Oldref pool allocator */
4803 oldref_pool_allocator = VG_(newPA)(
4804 sizeof(OldRef),
4805 1000 /* OldRefs per pool */,
4806 HG_(zalloc),
4807 "libhb.event_map_init.3 (OldRef pools)",
4808 HG_(free)
4809 );
sewardjd86e3a22008-12-03 11:39:37 +00004810
sewardjd86e3a22008-12-03 11:39:37 +00004811 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00004812 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00004813 oldrefTree = VG_(newSWA)(
4814 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00004815 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00004816 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00004817 );
sewardjf98e1c02008-10-25 16:22:41 +00004818
sewardjf98e1c02008-10-25 16:22:41 +00004819 oldrefTreeN = 0;
philippecabdbb52015-04-20 21:33:16 +00004820 mru.prev = &lru;
4821 mru.next = NULL;
4822 lru.prev = NULL;
4823 lru.next = &mru;
4824 for (i = 0; i < N_OLDREF_ACCS; i++) {
4825 mru.accs[i] = (Thr_n_RCEC) {.rcec = NULL,
4826 .locksHeldW = 0,
4827 .thrid = 0,
4828 .szLg2B = 0,
4829 .isW = 0};
4830 lru.accs[i] = mru.accs[i];
4831 }
sewardjf98e1c02008-10-25 16:22:41 +00004832}
4833
philippecabdbb52015-04-20 21:33:16 +00004834static void event_map__check_reference_counts ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004835{
4836 RCEC* rcec;
4837 OldRef* oldref;
4838 Word i;
4839 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00004840 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00004841
4842 /* Set the 'check' reference counts to zero. Also, optionally
4843 check that the real reference counts are non-zero. We allow
4844 these to fall to zero before a GC, but the GC must get rid of
4845 all those that are zero, hence none should be zero after a
4846 GC. */
4847 for (i = 0; i < N_RCEC_TAB; i++) {
4848 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4849 nEnts++;
4850 tl_assert(rcec);
4851 tl_assert(rcec->magic == RCEC_MAGIC);
sewardjf98e1c02008-10-25 16:22:41 +00004852 rcec->rcX = 0;
4853 }
4854 }
4855
4856 /* check that the stats are sane */
4857 tl_assert(nEnts == stats__ctxt_tab_curr);
4858 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4859
4860 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00004861 VG_(initIterSWA)( oldrefTree );
4862 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004863 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004864 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004865 ThrID aThrID = oldref->accs[i].thrid;
4866 RCEC* aRef = oldref->accs[i].rcec;
4867 if (aThrID != 0) {
sewardjc5ea9962008-12-07 01:41:46 +00004868 tl_assert(aRef);
4869 tl_assert(aRef->magic == RCEC_MAGIC);
4870 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00004871 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004872 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00004873 }
4874 }
4875 }
4876
4877 /* compare check ref counts with actual */
4878 for (i = 0; i < N_RCEC_TAB; i++) {
4879 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4880 tl_assert(rcec->rc == rcec->rcX);
4881 }
4882 }
4883}
4884
sewardj8fd92d32008-11-20 23:17:01 +00004885__attribute__((noinline))
philippecabdbb52015-04-20 21:33:16 +00004886static void do_RCEC_GC ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004887{
philippecabdbb52015-04-20 21:33:16 +00004888 UInt i;
sewardjf98e1c02008-10-25 16:22:41 +00004889
philippecabdbb52015-04-20 21:33:16 +00004890 if (VG_(clo_stats)) {
4891 static UInt ctr = 1;
4892 VG_(message)(Vg_DebugMsg,
4893 "libhb: RCEC GC: #%u %lu slots,"
4894 " %lu cur ents(ref'd %lu),"
4895 " %lu max ents\n",
4896 ctr++,
4897 (UWord)N_RCEC_TAB,
4898 stats__ctxt_tab_curr, RCEC_referenced,
4899 stats__ctxt_tab_max );
sewardjf98e1c02008-10-25 16:22:41 +00004900 }
philippecabdbb52015-04-20 21:33:16 +00004901 tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004902
4903 /* Throw away all RCECs with zero reference counts */
4904 for (i = 0; i < N_RCEC_TAB; i++) {
4905 RCEC** pp = &contextTab[i];
4906 RCEC* p = *pp;
4907 while (p) {
4908 if (p->rc == 0) {
4909 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004910 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004911 p = *pp;
4912 tl_assert(stats__ctxt_tab_curr > 0);
philippe06bc23a2015-04-17 21:19:43 +00004913 stats__ctxt_rcdec_discards++;
sewardjf98e1c02008-10-25 16:22:41 +00004914 stats__ctxt_tab_curr--;
4915 } else {
4916 pp = &p->next;
4917 p = p->next;
4918 }
4919 }
4920 }
4921
philippecabdbb52015-04-20 21:33:16 +00004922 tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004923}
4924
sewardjf98e1c02008-10-25 16:22:41 +00004925/////////////////////////////////////////////////////////
4926// //
4927// Core MSM //
4928// //
4929/////////////////////////////////////////////////////////
4930
sewardj23f12002009-07-24 08:45:08 +00004931/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4932 Nov 08, and again after [...],
4933 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004934
sewardj23f12002009-07-24 08:45:08 +00004935static ULong stats__msmcread = 0;
4936static ULong stats__msmcread_change = 0;
4937static ULong stats__msmcwrite = 0;
4938static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004939
sewardj8ab2c132009-08-02 09:34:35 +00004940/* Some notes on the H1 history mechanism:
4941
4942 Transition rules are:
4943
4944 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4945 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4946
4947 After any access by a thread T to a location L, L's constraint pair
4948 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4949
4950 After a race by thread T conflicting with some previous access by
4951 some other thread U, for a location with constraint (before
4952 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4953 which the previously access lies.
4954
4955 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4956 are compared so as to find out which thread(s) this access
4957 conflicts with. Once that is established, we also require the
4958 pre-update Cw for the location, so we can index into it for those
4959 threads, to get the scalar clock values for the point at which the
4960 former accesses were made. (In fact we only bother to do any of
4961 this for an arbitrarily chosen one of the conflicting threads, as
4962 that's simpler, it avoids flooding the user with vast amounts of
4963 mostly useless information, and because the program is wrong if it
4964 contains any races at all -- so we don't really need to show all
4965 conflicting access pairs initially, so long as we only show none if
4966 none exist).
4967
4968 ---
4969
4970 That requires the auxiliary proof that
4971
4972 (Cr `join` Kw)[T] == Kw[T]
4973
4974 Why should that be true? Because for any thread T, Kw[T] >= the
4975 scalar clock value for T known by any other thread. In other
4976 words, because T's value for its own scalar clock is at least as up
4977 to date as the value for it known by any other thread (that is true
4978 for both the R- and W- scalar clocks). Hence no other thread will
4979 be able to feed in a value for that element (indirectly via a
4980 constraint) which will exceed Kw[T], and hence the join cannot
4981 cause that particular element to advance.
4982*/
4983
sewardjf98e1c02008-10-25 16:22:41 +00004984__attribute__((noinline))
4985static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004986 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00004987 VtsID Cfailed,
4988 VtsID Kfailed,
4989 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00004990{
sewardjc5ea9962008-12-07 01:41:46 +00004991 /* Call here to report a race. We just hand it onwards to
4992 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004993 error is going to be collected, then, at history_level 2, that
4994 queries the conflicting-event map. The alternative would be to
4995 query it right here. But that causes a lot of pointless queries
4996 for errors which will shortly be discarded as duplicates, and
4997 can become a performance overhead; so we defer the query until
4998 we know the error is not a duplicate. */
4999
5000 /* Stacks for the bounds of the (or one of the) conflicting
5001 segment(s). These are only set at history_level 1. */
5002 ExeContext* hist1_seg_start = NULL;
5003 ExeContext* hist1_seg_end = NULL;
5004 Thread* hist1_conf_thr = NULL;
5005
5006 tl_assert(acc_thr);
sewardj60626642011-03-10 15:14:37 +00005007 tl_assert(acc_thr->hgthread);
5008 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
sewardj23f12002009-07-24 08:45:08 +00005009 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
5010
5011 if (HG_(clo_history_level) == 1) {
5012 Bool found;
5013 Word firstIx, lastIx;
5014 ULong_n_EC key;
5015
5016 /* At history_level 1, we must round up the relevant stack-pair
5017 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00005018 deferring it is complex; we can't (easily) put Kfailed and
5019 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00005020 getting tied up in difficulties with VtsID reference
5021 counting. So just do it now. */
5022 Thr* confThr;
5023 ULong confTym = 0;
5024 /* Which thread are we in conflict with? There may be more than
5025 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
5026 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00005027 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00005028 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00005029 conflict (semantics of return value of
5030 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
5031 called us, just checked exactly this -- that there was in
5032 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00005033 tl_assert(confThr);
5034
5035 /* Get the scalar clock value that the conflicting thread
5036 introduced into the constraint. A careful examination of the
5037 base machine rules shows that this must be the same as the
5038 conflicting thread's scalar clock when it created this
5039 constraint. Hence we know the scalar clock of the
5040 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00005041 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00005042
5043 /* Using this scalar clock, index into the conflicting thread's
5044 collection of stack traces made each time its vector clock
5045 (hence its scalar clock) changed. This gives the stack
5046 traces at the start and end of the conflicting segment (well,
5047 as per comment just above, of one of the conflicting
5048 segments, if there are more than one). */
5049 key.ull = confTym;
5050 key.ec = NULL;
5051 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00005052 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00005053 firstIx = lastIx = 0;
5054 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00005055 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00005056 &key, &firstIx, &lastIx,
florian6bd9dc12012-11-23 16:17:43 +00005057 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
sewardj23f12002009-07-24 08:45:08 +00005058 );
sewardj8ab2c132009-08-02 09:34:35 +00005059 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00005060 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00005061 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00005062 confThr, confTym, found, firstIx, lastIx);
5063 /* We can't indefinitely collect stack traces at VTS
5064 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00005065 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00005066 ones, which in turn means we might fail to find index value
5067 confTym in the array. */
5068 if (found) {
5069 ULong_n_EC *pair_start, *pair_end;
5070 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00005071 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00005072 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00005073 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00005074 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00005075 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00005076 lastIx+1 );
5077 /* from properties of VG_(lookupXA) and the comparison fn used: */
5078 tl_assert(pair_start->ull < pair_end->ull);
5079 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00005080 /* Could do a bit better here. It may be that pair_end
5081 doesn't have a stack, but the following entries in the
5082 array have the same scalar Kw and to have a stack. So
5083 we should search a bit further along the array than
5084 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00005085 } else {
sewardjffce8152011-06-24 10:09:41 +00005086 if (!confThr->llexit_done)
sewardj23f12002009-07-24 08:45:08 +00005087 hist1_seg_end = main_get_EC( confThr );
5088 }
5089 // seg_start could be NULL iff this is the first stack in the thread
5090 //if (seg_start) VG_(pp_ExeContext)(seg_start);
5091 //if (seg_end) VG_(pp_ExeContext)(seg_end);
sewardj60626642011-03-10 15:14:37 +00005092 hist1_conf_thr = confThr->hgthread;
sewardj23f12002009-07-24 08:45:08 +00005093 }
5094 }
5095
sewardj60626642011-03-10 15:14:37 +00005096 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00005097 szB, isWrite,
5098 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00005099}
5100
5101static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00005102 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00005103 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00005104 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
5105 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00005106}
5107
5108
5109/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00005110static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005111 /* The following are only needed for
5112 creating error reports. */
5113 Thr* acc_thr,
5114 Addr acc_addr, SizeT szB )
5115{
5116 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005117 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00005118
5119 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005120 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005121 tl_assert(is_sane_SVal_C(svOld));
5122 }
5123
sewardj1c0ce7a2009-07-01 08:10:49 +00005124 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005125 VtsID tviR = acc_thr->viR;
5126 VtsID tviW = acc_thr->viW;
5127 VtsID rmini = SVal__unC_Rmin(svOld);
5128 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005129 Bool leq = VtsID__cmpLEQ(rmini,tviR);
5130 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005131 /* no race */
5132 /* Note: RWLOCK subtlety: use tviW, not tviR */
5133 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5134 goto out;
5135 } else {
sewardjb0e009d2008-11-19 16:35:15 +00005136 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005137 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5138 tl_assert(leqxx);
5139 // same as in non-race case
5140 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5141 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005142 rmini, /* Cfailed */
5143 tviR, /* Kfailed */
5144 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005145 goto out;
5146 }
5147 }
5148 if (SVal__isA(svOld)) {
5149 /* reading no-access memory (sigh); leave unchanged */
5150 /* check for no pollution */
5151 tl_assert(svOld == SVal_NOACCESS);
5152 svNew = SVal_NOACCESS;
5153 goto out;
5154 }
sewardj23f12002009-07-24 08:45:08 +00005155 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005156 tl_assert(0);
5157
5158 out:
sewardj8f5374e2008-12-07 11:40:17 +00005159 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005160 tl_assert(is_sane_SVal_C(svNew));
5161 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005162 if (UNLIKELY(svNew != svOld)) {
5163 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005164 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005165 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005166 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005167 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005168 }
5169 }
5170 return svNew;
5171}
5172
5173
5174/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00005175static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005176 /* The following are only needed for
5177 creating error reports. */
5178 Thr* acc_thr,
5179 Addr acc_addr, SizeT szB )
5180{
5181 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005182 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00005183
5184 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005185 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005186 tl_assert(is_sane_SVal_C(svOld));
5187 }
5188
sewardj1c0ce7a2009-07-01 08:10:49 +00005189 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005190 VtsID tviW = acc_thr->viW;
5191 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005192 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5193 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005194 /* no race */
5195 svNew = SVal__mkC( tviW, tviW );
5196 goto out;
5197 } else {
5198 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00005199 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005200 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5201 tl_assert(leqxx);
5202 // same as in non-race case
5203 // proof: in the non-race case, we have
5204 // rmini <= wmini (invar on constraints)
5205 // tviW <= tviR (invar on thread clocks)
5206 // wmini <= tviW (from run-time check)
5207 // hence from transitivity of <= we have
5208 // rmini <= wmini <= tviW
5209 // and so join(rmini,tviW) == tviW
5210 // and join(wmini,tviW) == tviW
5211 // qed.
5212 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5213 VtsID__join2(wmini, tviW) );
5214 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005215 wmini, /* Cfailed */
5216 tviW, /* Kfailed */
5217 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005218 goto out;
5219 }
5220 }
5221 if (SVal__isA(svOld)) {
5222 /* writing no-access memory (sigh); leave unchanged */
5223 /* check for no pollution */
5224 tl_assert(svOld == SVal_NOACCESS);
5225 svNew = SVal_NOACCESS;
5226 goto out;
5227 }
sewardj23f12002009-07-24 08:45:08 +00005228 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005229 tl_assert(0);
5230
5231 out:
sewardj8f5374e2008-12-07 11:40:17 +00005232 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005233 tl_assert(is_sane_SVal_C(svNew));
5234 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005235 if (UNLIKELY(svNew != svOld)) {
5236 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005237 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005238 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005239 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005240 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005241 }
5242 }
5243 return svNew;
5244}
5245
5246
5247/////////////////////////////////////////////////////////
5248// //
5249// Apply core MSM to specific memory locations //
5250// //
5251/////////////////////////////////////////////////////////
5252
sewardj23f12002009-07-24 08:45:08 +00005253/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005254
sewardj23f12002009-07-24 08:45:08 +00005255static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005256 CacheLine* cl;
5257 UWord cloff, tno, toff;
5258 SVal svOld, svNew;
5259 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005260 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005261 cl = get_cacheline(a);
5262 cloff = get_cacheline_offset(a);
5263 tno = get_treeno(a);
5264 toff = get_tree_offset(a); /* == 0 .. 7 */
5265 descr = cl->descrs[tno];
5266 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5267 SVal* tree = &cl->svals[tno << 3];
5268 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005269 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005270 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5271 }
5272 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005273 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005274 if (CHECK_ZSM)
5275 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005276 cl->svals[cloff] = svNew;
5277}
5278
sewardj23f12002009-07-24 08:45:08 +00005279static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005280 CacheLine* cl;
5281 UWord cloff, tno, toff;
5282 SVal svOld, svNew;
5283 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005284 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005285 cl = get_cacheline(a);
5286 cloff = get_cacheline_offset(a);
5287 tno = get_treeno(a);
5288 toff = get_tree_offset(a); /* == 0 .. 7 */
5289 descr = cl->descrs[tno];
5290 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5291 SVal* tree = &cl->svals[tno << 3];
5292 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005293 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005294 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5295 }
5296 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005297 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005298 if (CHECK_ZSM)
5299 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005300 cl->svals[cloff] = svNew;
5301}
5302
sewardj23f12002009-07-24 08:45:08 +00005303/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005304
sewardj23f12002009-07-24 08:45:08 +00005305static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005306 CacheLine* cl;
5307 UWord cloff, tno, toff;
5308 SVal svOld, svNew;
5309 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005310 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005311 if (UNLIKELY(!aligned16(a))) goto slowcase;
5312 cl = get_cacheline(a);
5313 cloff = get_cacheline_offset(a);
5314 tno = get_treeno(a);
5315 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5316 descr = cl->descrs[tno];
5317 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5318 if (valid_value_is_below_me_16(descr, toff)) {
5319 goto slowcase;
5320 } else {
5321 SVal* tree = &cl->svals[tno << 3];
5322 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5323 }
sewardj8f5374e2008-12-07 11:40:17 +00005324 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005325 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5326 }
5327 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005328 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005329 if (CHECK_ZSM)
5330 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005331 cl->svals[cloff] = svNew;
5332 return;
5333 slowcase: /* misaligned, or must go further down the tree */
5334 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005335 zsm_sapply08__msmcread( thr, a + 0 );
5336 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005337}
5338
sewardj23f12002009-07-24 08:45:08 +00005339static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005340 CacheLine* cl;
5341 UWord cloff, tno, toff;
5342 SVal svOld, svNew;
5343 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005344 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005345 if (UNLIKELY(!aligned16(a))) goto slowcase;
5346 cl = get_cacheline(a);
5347 cloff = get_cacheline_offset(a);
5348 tno = get_treeno(a);
5349 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5350 descr = cl->descrs[tno];
5351 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5352 if (valid_value_is_below_me_16(descr, toff)) {
5353 goto slowcase;
5354 } else {
5355 SVal* tree = &cl->svals[tno << 3];
5356 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5357 }
sewardj8f5374e2008-12-07 11:40:17 +00005358 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005359 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5360 }
5361 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005362 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005363 if (CHECK_ZSM)
5364 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005365 cl->svals[cloff] = svNew;
5366 return;
5367 slowcase: /* misaligned, or must go further down the tree */
5368 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005369 zsm_sapply08__msmcwrite( thr, a + 0 );
5370 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005371}
5372
sewardj23f12002009-07-24 08:45:08 +00005373/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005374
sewardj23f12002009-07-24 08:45:08 +00005375static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005376 CacheLine* cl;
5377 UWord cloff, tno, toff;
5378 SVal svOld, svNew;
5379 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005380 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005381 if (UNLIKELY(!aligned32(a))) goto slowcase;
5382 cl = get_cacheline(a);
5383 cloff = get_cacheline_offset(a);
5384 tno = get_treeno(a);
5385 toff = get_tree_offset(a); /* == 0 or 4 */
5386 descr = cl->descrs[tno];
5387 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5388 if (valid_value_is_above_me_32(descr, toff)) {
5389 SVal* tree = &cl->svals[tno << 3];
5390 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5391 } else {
5392 goto slowcase;
5393 }
sewardj8f5374e2008-12-07 11:40:17 +00005394 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005395 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5396 }
5397 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005398 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005399 if (CHECK_ZSM)
5400 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005401 cl->svals[cloff] = svNew;
5402 return;
5403 slowcase: /* misaligned, or must go further down the tree */
5404 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005405 zsm_sapply16__msmcread( thr, a + 0 );
5406 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005407}
5408
sewardj23f12002009-07-24 08:45:08 +00005409static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005410 CacheLine* cl;
5411 UWord cloff, tno, toff;
5412 SVal svOld, svNew;
5413 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005414 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005415 if (UNLIKELY(!aligned32(a))) goto slowcase;
5416 cl = get_cacheline(a);
5417 cloff = get_cacheline_offset(a);
5418 tno = get_treeno(a);
5419 toff = get_tree_offset(a); /* == 0 or 4 */
5420 descr = cl->descrs[tno];
5421 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5422 if (valid_value_is_above_me_32(descr, toff)) {
5423 SVal* tree = &cl->svals[tno << 3];
5424 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5425 } else {
5426 goto slowcase;
5427 }
sewardj8f5374e2008-12-07 11:40:17 +00005428 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005429 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5430 }
5431 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005432 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005433 if (CHECK_ZSM)
5434 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005435 cl->svals[cloff] = svNew;
5436 return;
5437 slowcase: /* misaligned, or must go further down the tree */
5438 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005439 zsm_sapply16__msmcwrite( thr, a + 0 );
5440 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005441}
5442
sewardj23f12002009-07-24 08:45:08 +00005443/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005444
sewardj23f12002009-07-24 08:45:08 +00005445static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005446 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005447 UWord cloff, tno;
5448 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005449 SVal svOld, svNew;
5450 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005451 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005452 if (UNLIKELY(!aligned64(a))) goto slowcase;
5453 cl = get_cacheline(a);
5454 cloff = get_cacheline_offset(a);
5455 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005456 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005457 descr = cl->descrs[tno];
5458 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5459 goto slowcase;
5460 }
5461 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005462 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005463 if (CHECK_ZSM)
5464 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005465 cl->svals[cloff] = svNew;
5466 return;
5467 slowcase: /* misaligned, or must go further down the tree */
5468 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005469 zsm_sapply32__msmcread( thr, a + 0 );
5470 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005471}
5472
sewardj23f12002009-07-24 08:45:08 +00005473static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005474 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005475 UWord cloff, tno;
5476 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005477 SVal svOld, svNew;
5478 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005479 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005480 if (UNLIKELY(!aligned64(a))) goto slowcase;
5481 cl = get_cacheline(a);
5482 cloff = get_cacheline_offset(a);
5483 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005484 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005485 descr = cl->descrs[tno];
5486 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5487 goto slowcase;
5488 }
5489 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005490 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005491 if (CHECK_ZSM)
5492 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005493 cl->svals[cloff] = svNew;
5494 return;
5495 slowcase: /* misaligned, or must go further down the tree */
5496 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005497 zsm_sapply32__msmcwrite( thr, a + 0 );
5498 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005499}
5500
sewardj23f12002009-07-24 08:45:08 +00005501/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005502
5503static
sewardj23f12002009-07-24 08:45:08 +00005504void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005505 CacheLine* cl;
5506 UWord cloff, tno, toff;
5507 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005508 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005509 cl = get_cacheline(a);
5510 cloff = get_cacheline_offset(a);
5511 tno = get_treeno(a);
5512 toff = get_tree_offset(a); /* == 0 .. 7 */
5513 descr = cl->descrs[tno];
5514 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5515 SVal* tree = &cl->svals[tno << 3];
5516 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005517 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005518 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5519 }
5520 tl_assert(svNew != SVal_INVALID);
5521 cl->svals[cloff] = svNew;
5522}
5523
sewardj23f12002009-07-24 08:45:08 +00005524/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005525
5526static
sewardj23f12002009-07-24 08:45:08 +00005527void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005528 CacheLine* cl;
5529 UWord cloff, tno, toff;
5530 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005531 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005532 if (UNLIKELY(!aligned16(a))) goto slowcase;
5533 cl = get_cacheline(a);
5534 cloff = get_cacheline_offset(a);
5535 tno = get_treeno(a);
5536 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5537 descr = cl->descrs[tno];
5538 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5539 if (valid_value_is_below_me_16(descr, toff)) {
5540 /* Writing at this level. Need to fix up 'descr'. */
5541 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5542 /* At this point, the tree does not match cl->descr[tno] any
5543 more. The assignments below will fix it up. */
5544 } else {
5545 /* We can't indiscriminately write on the w16 node as in the
5546 w64 case, as that might make the node inconsistent with
5547 its parent. So first, pull down to this level. */
5548 SVal* tree = &cl->svals[tno << 3];
5549 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005550 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005551 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5552 }
5553 }
5554 tl_assert(svNew != SVal_INVALID);
5555 cl->svals[cloff + 0] = svNew;
5556 cl->svals[cloff + 1] = SVal_INVALID;
5557 return;
5558 slowcase: /* misaligned */
5559 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005560 zsm_swrite08( a + 0, svNew );
5561 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005562}
5563
sewardj23f12002009-07-24 08:45:08 +00005564/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005565
5566static
sewardj23f12002009-07-24 08:45:08 +00005567void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005568 CacheLine* cl;
5569 UWord cloff, tno, toff;
5570 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005571 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005572 if (UNLIKELY(!aligned32(a))) goto slowcase;
5573 cl = get_cacheline(a);
5574 cloff = get_cacheline_offset(a);
5575 tno = get_treeno(a);
5576 toff = get_tree_offset(a); /* == 0 or 4 */
5577 descr = cl->descrs[tno];
5578 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5579 if (valid_value_is_above_me_32(descr, toff)) {
5580 /* We can't indiscriminately write on the w32 node as in the
5581 w64 case, as that might make the node inconsistent with
5582 its parent. So first, pull down to this level. */
5583 SVal* tree = &cl->svals[tno << 3];
5584 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005585 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005586 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5587 } else {
5588 /* Writing at this level. Need to fix up 'descr'. */
5589 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5590 /* At this point, the tree does not match cl->descr[tno] any
5591 more. The assignments below will fix it up. */
5592 }
5593 }
5594 tl_assert(svNew != SVal_INVALID);
5595 cl->svals[cloff + 0] = svNew;
5596 cl->svals[cloff + 1] = SVal_INVALID;
5597 cl->svals[cloff + 2] = SVal_INVALID;
5598 cl->svals[cloff + 3] = SVal_INVALID;
5599 return;
5600 slowcase: /* misaligned */
5601 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005602 zsm_swrite16( a + 0, svNew );
5603 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005604}
5605
sewardj23f12002009-07-24 08:45:08 +00005606/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005607
5608static
sewardj23f12002009-07-24 08:45:08 +00005609void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005610 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005611 UWord cloff, tno;
5612 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00005613 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005614 if (UNLIKELY(!aligned64(a))) goto slowcase;
5615 cl = get_cacheline(a);
5616 cloff = get_cacheline_offset(a);
5617 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005618 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005619 cl->descrs[tno] = TREE_DESCR_64;
5620 tl_assert(svNew != SVal_INVALID);
5621 cl->svals[cloff + 0] = svNew;
5622 cl->svals[cloff + 1] = SVal_INVALID;
5623 cl->svals[cloff + 2] = SVal_INVALID;
5624 cl->svals[cloff + 3] = SVal_INVALID;
5625 cl->svals[cloff + 4] = SVal_INVALID;
5626 cl->svals[cloff + 5] = SVal_INVALID;
5627 cl->svals[cloff + 6] = SVal_INVALID;
5628 cl->svals[cloff + 7] = SVal_INVALID;
5629 return;
5630 slowcase: /* misaligned */
5631 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005632 zsm_swrite32( a + 0, svNew );
5633 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005634}
5635
sewardj23f12002009-07-24 08:45:08 +00005636/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005637
5638static
sewardj23f12002009-07-24 08:45:08 +00005639SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005640 CacheLine* cl;
5641 UWord cloff, tno, toff;
5642 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005643 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005644 cl = get_cacheline(a);
5645 cloff = get_cacheline_offset(a);
5646 tno = get_treeno(a);
5647 toff = get_tree_offset(a); /* == 0 .. 7 */
5648 descr = cl->descrs[tno];
5649 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5650 SVal* tree = &cl->svals[tno << 3];
5651 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5652 }
5653 return cl->svals[cloff];
5654}
5655
sewardj23f12002009-07-24 08:45:08 +00005656static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00005657 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00005658 stats__cline_scopy08s++;
5659 sv = zsm_sread08( src );
5660 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00005661}
5662
5663
sewardj23f12002009-07-24 08:45:08 +00005664/* Block-copy states (needed for implementing realloc()). Note this
5665 doesn't change the filtering arrangements. The caller of
5666 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00005667
sewardj23f12002009-07-24 08:45:08 +00005668static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005669{
5670 SizeT i;
5671 if (len == 0)
5672 return;
5673
5674 /* assert for non-overlappingness */
5675 tl_assert(src+len <= dst || dst+len <= src);
5676
5677 /* To be simple, just copy byte by byte. But so as not to wreck
5678 performance for later accesses to dst[0 .. len-1], normalise
5679 destination lines as we finish with them, and also normalise the
5680 line containing the first and last address. */
5681 for (i = 0; i < len; i++) {
5682 Bool normalise
5683 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5684 || i == 0 /* first in range */
5685 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00005686 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00005687 }
5688}
5689
5690
5691/* For setting address ranges to a given value. Has considerable
5692 sophistication so as to avoid generating large numbers of pointless
5693 cache loads/writebacks for large ranges. */
5694
5695/* Do small ranges in-cache, in the obvious way. */
5696static
sewardj23f12002009-07-24 08:45:08 +00005697void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005698{
5699 /* fast track a couple of common cases */
5700 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00005701 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005702 return;
5703 }
5704 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00005705 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005706 return;
5707 }
5708
5709 /* be completely general (but as efficient as possible) */
5710 if (len == 0) return;
5711
5712 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005713 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005714 a += 1;
5715 len -= 1;
5716 tl_assert(aligned16(a));
5717 }
5718 if (len == 0) return;
5719
5720 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005721 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005722 a += 2;
5723 len -= 2;
5724 tl_assert(aligned32(a));
5725 }
5726 if (len == 0) return;
5727
5728 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005729 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005730 a += 4;
5731 len -= 4;
5732 tl_assert(aligned64(a));
5733 }
5734 if (len == 0) return;
5735
5736 if (len >= 8) {
5737 tl_assert(aligned64(a));
5738 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00005739 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005740 a += 8;
5741 len -= 8;
5742 }
5743 tl_assert(aligned64(a));
5744 }
5745 if (len == 0) return;
5746
5747 if (len >= 4)
5748 tl_assert(aligned32(a));
5749 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005750 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005751 a += 4;
5752 len -= 4;
5753 }
5754 if (len == 0) return;
5755
5756 if (len >= 2)
5757 tl_assert(aligned16(a));
5758 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005759 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005760 a += 2;
5761 len -= 2;
5762 }
5763 if (len == 0) return;
5764
5765 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005766 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00005767 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00005768 len -= 1;
5769 }
5770 tl_assert(len == 0);
5771}
5772
5773
sewardj23f12002009-07-24 08:45:08 +00005774/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00005775 for larger ranges, try to operate directly on the out-of-cache
5776 representation, rather than dragging lines into the cache,
5777 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00005778 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00005779
sewardj23f12002009-07-24 08:45:08 +00005780 Note that this doesn't change the filtering arrangements. The
5781 caller of zsm_sset_range needs to attend to that. */
5782
5783static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005784{
5785 tl_assert(svNew != SVal_INVALID);
5786 stats__cache_make_New_arange += (ULong)len;
5787
5788 if (0 && len > 500)
5789 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
5790
5791 if (0) {
5792 static UWord n_New_in_cache = 0;
5793 static UWord n_New_not_in_cache = 0;
5794 /* tag is 'a' with the in-line offset masked out,
5795 eg a[31]..a[4] 0000 */
5796 Addr tag = a & ~(N_LINE_ARANGE - 1);
5797 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5798 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5799 n_New_in_cache++;
5800 } else {
5801 n_New_not_in_cache++;
5802 }
5803 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5804 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5805 n_New_in_cache, n_New_not_in_cache );
5806 }
5807
5808 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00005809 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005810 } else {
5811 Addr before_start = a;
5812 Addr aligned_start = cacheline_ROUNDUP(a);
5813 Addr after_start = cacheline_ROUNDDN(a + len);
5814 UWord before_len = aligned_start - before_start;
5815 UWord aligned_len = after_start - aligned_start;
5816 UWord after_len = a + len - after_start;
5817 tl_assert(before_start <= aligned_start);
5818 tl_assert(aligned_start <= after_start);
5819 tl_assert(before_len < N_LINE_ARANGE);
5820 tl_assert(after_len < N_LINE_ARANGE);
5821 tl_assert(get_cacheline_offset(aligned_start) == 0);
5822 if (get_cacheline_offset(a) == 0) {
5823 tl_assert(before_len == 0);
5824 tl_assert(a == aligned_start);
5825 }
5826 if (get_cacheline_offset(a+len) == 0) {
5827 tl_assert(after_len == 0);
5828 tl_assert(after_start == a+len);
5829 }
5830 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005831 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005832 }
5833 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005834 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005835 }
5836 stats__cache_make_New_inZrep += (ULong)aligned_len;
5837
5838 while (1) {
5839 Addr tag;
5840 UWord wix;
5841 if (aligned_start >= after_start)
5842 break;
5843 tl_assert(get_cacheline_offset(aligned_start) == 0);
5844 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5845 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5846 if (tag == cache_shmem.tags0[wix]) {
5847 UWord i;
5848 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005849 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005850 } else {
5851 UWord i;
5852 Word zix;
5853 SecMap* sm;
5854 LineZ* lineZ;
5855 /* This line is not in the cache. Do not force it in; instead
5856 modify it in-place. */
5857 /* find the Z line to write in and rcdec it or the
5858 associated F line. */
5859 find_Z_for_writing( &sm, &zix, tag );
5860 tl_assert(sm);
5861 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5862 lineZ = &sm->linesZ[zix];
5863 lineZ->dict[0] = svNew;
5864 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5865 for (i = 0; i < N_LINE_ARANGE/4; i++)
5866 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5867 rcinc_LineZ(lineZ);
5868 }
5869 aligned_start += N_LINE_ARANGE;
5870 aligned_len -= N_LINE_ARANGE;
5871 }
5872 tl_assert(aligned_start == after_start);
5873 tl_assert(aligned_len == 0);
5874 }
5875}
5876
5877
5878/////////////////////////////////////////////////////////
5879// //
sewardj23f12002009-07-24 08:45:08 +00005880// Front-filtering accesses //
5881// //
5882/////////////////////////////////////////////////////////
5883
5884static UWord stats__f_ac = 0;
5885static UWord stats__f_sk = 0;
5886
5887#if 0
5888# define STATS__F_SHOW \
5889 do { \
5890 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5891 VG_(printf)("filters: ac %lu sk %lu\n", \
5892 stats__f_ac, stats__f_sk); \
5893 } while (0)
5894#else
5895# define STATS__F_SHOW /* */
5896#endif
5897
5898void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5899 stats__f_ac++;
5900 STATS__F_SHOW;
5901 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5902 stats__f_sk++;
5903 return;
5904 }
5905 zsm_sapply08__msmcwrite(thr, a);
5906}
5907
5908void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5909 stats__f_ac++;
5910 STATS__F_SHOW;
5911 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5912 stats__f_sk++;
5913 return;
5914 }
5915 zsm_sapply16__msmcwrite(thr, a);
5916}
5917
5918void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5919 stats__f_ac++;
5920 STATS__F_SHOW;
5921 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5922 stats__f_sk++;
5923 return;
5924 }
5925 zsm_sapply32__msmcwrite(thr, a);
5926}
5927
5928void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5929 stats__f_ac++;
5930 STATS__F_SHOW;
5931 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5932 stats__f_sk++;
5933 return;
5934 }
5935 zsm_sapply64__msmcwrite(thr, a);
5936}
5937
5938void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5939{
5940 /* fast track a couple of common cases */
5941 if (len == 4 && aligned32(a)) {
5942 zsm_sapply32_f__msmcwrite( thr, a );
5943 return;
5944 }
5945 if (len == 8 && aligned64(a)) {
5946 zsm_sapply64_f__msmcwrite( thr, a );
5947 return;
5948 }
5949
5950 /* be completely general (but as efficient as possible) */
5951 if (len == 0) return;
5952
5953 if (!aligned16(a) && len >= 1) {
5954 zsm_sapply08_f__msmcwrite( thr, a );
5955 a += 1;
5956 len -= 1;
5957 tl_assert(aligned16(a));
5958 }
5959 if (len == 0) return;
5960
5961 if (!aligned32(a) && len >= 2) {
5962 zsm_sapply16_f__msmcwrite( thr, a );
5963 a += 2;
5964 len -= 2;
5965 tl_assert(aligned32(a));
5966 }
5967 if (len == 0) return;
5968
5969 if (!aligned64(a) && len >= 4) {
5970 zsm_sapply32_f__msmcwrite( thr, a );
5971 a += 4;
5972 len -= 4;
5973 tl_assert(aligned64(a));
5974 }
5975 if (len == 0) return;
5976
5977 if (len >= 8) {
5978 tl_assert(aligned64(a));
5979 while (len >= 8) {
5980 zsm_sapply64_f__msmcwrite( thr, a );
5981 a += 8;
5982 len -= 8;
5983 }
5984 tl_assert(aligned64(a));
5985 }
5986 if (len == 0) return;
5987
5988 if (len >= 4)
5989 tl_assert(aligned32(a));
5990 if (len >= 4) {
5991 zsm_sapply32_f__msmcwrite( thr, a );
5992 a += 4;
5993 len -= 4;
5994 }
5995 if (len == 0) return;
5996
5997 if (len >= 2)
5998 tl_assert(aligned16(a));
5999 if (len >= 2) {
6000 zsm_sapply16_f__msmcwrite( thr, a );
6001 a += 2;
6002 len -= 2;
6003 }
6004 if (len == 0) return;
6005
6006 if (len >= 1) {
6007 zsm_sapply08_f__msmcwrite( thr, a );
6008 //a += 1;
6009 len -= 1;
6010 }
6011 tl_assert(len == 0);
6012}
6013
6014void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
6015 stats__f_ac++;
6016 STATS__F_SHOW;
6017 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
6018 stats__f_sk++;
6019 return;
6020 }
6021 zsm_sapply08__msmcread(thr, a);
6022}
6023
6024void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
6025 stats__f_ac++;
6026 STATS__F_SHOW;
6027 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
6028 stats__f_sk++;
6029 return;
6030 }
6031 zsm_sapply16__msmcread(thr, a);
6032}
6033
6034void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
6035 stats__f_ac++;
6036 STATS__F_SHOW;
6037 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
6038 stats__f_sk++;
6039 return;
6040 }
6041 zsm_sapply32__msmcread(thr, a);
6042}
6043
6044void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
6045 stats__f_ac++;
6046 STATS__F_SHOW;
6047 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
6048 stats__f_sk++;
6049 return;
6050 }
6051 zsm_sapply64__msmcread(thr, a);
6052}
6053
6054void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
6055{
6056 /* fast track a couple of common cases */
6057 if (len == 4 && aligned32(a)) {
6058 zsm_sapply32_f__msmcread( thr, a );
6059 return;
6060 }
6061 if (len == 8 && aligned64(a)) {
6062 zsm_sapply64_f__msmcread( thr, a );
6063 return;
6064 }
6065
6066 /* be completely general (but as efficient as possible) */
6067 if (len == 0) return;
6068
6069 if (!aligned16(a) && len >= 1) {
6070 zsm_sapply08_f__msmcread( thr, a );
6071 a += 1;
6072 len -= 1;
6073 tl_assert(aligned16(a));
6074 }
6075 if (len == 0) return;
6076
6077 if (!aligned32(a) && len >= 2) {
6078 zsm_sapply16_f__msmcread( thr, a );
6079 a += 2;
6080 len -= 2;
6081 tl_assert(aligned32(a));
6082 }
6083 if (len == 0) return;
6084
6085 if (!aligned64(a) && len >= 4) {
6086 zsm_sapply32_f__msmcread( thr, a );
6087 a += 4;
6088 len -= 4;
6089 tl_assert(aligned64(a));
6090 }
6091 if (len == 0) return;
6092
6093 if (len >= 8) {
6094 tl_assert(aligned64(a));
6095 while (len >= 8) {
6096 zsm_sapply64_f__msmcread( thr, a );
6097 a += 8;
6098 len -= 8;
6099 }
6100 tl_assert(aligned64(a));
6101 }
6102 if (len == 0) return;
6103
6104 if (len >= 4)
6105 tl_assert(aligned32(a));
6106 if (len >= 4) {
6107 zsm_sapply32_f__msmcread( thr, a );
6108 a += 4;
6109 len -= 4;
6110 }
6111 if (len == 0) return;
6112
6113 if (len >= 2)
6114 tl_assert(aligned16(a));
6115 if (len >= 2) {
6116 zsm_sapply16_f__msmcread( thr, a );
6117 a += 2;
6118 len -= 2;
6119 }
6120 if (len == 0) return;
6121
6122 if (len >= 1) {
6123 zsm_sapply08_f__msmcread( thr, a );
6124 //a += 1;
6125 len -= 1;
6126 }
6127 tl_assert(len == 0);
6128}
6129
6130void libhb_Thr_resumes ( Thr* thr )
6131{
6132 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006133 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006134 tl_assert(!thr->llexit_done);
sewardj23f12002009-07-24 08:45:08 +00006135 Filter__clear(thr->filter, "libhb_Thr_resumes");
6136 /* A kludge, but .. if this thread doesn't have any marker stacks
6137 at all, get one right now. This is easier than figuring out
6138 exactly when at thread startup we can and can't take a stack
6139 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00006140 if (HG_(clo_history_level) == 1) {
6141 tl_assert(thr->local_Kws_n_stacks);
6142 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
6143 note_local_Kw_n_stack_for(thr);
6144 }
sewardj23f12002009-07-24 08:45:08 +00006145}
6146
6147
6148/////////////////////////////////////////////////////////
6149// //
sewardjf98e1c02008-10-25 16:22:41 +00006150// Synchronisation objects //
6151// //
6152/////////////////////////////////////////////////////////
6153
sewardjffce8152011-06-24 10:09:41 +00006154/* A double linked list of all the SO's. */
6155SO* admin_SO = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006156
sewardjffce8152011-06-24 10:09:41 +00006157static SO* SO__Alloc ( void )
6158{
sewardjf98e1c02008-10-25 16:22:41 +00006159 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6160 so->viR = VtsID_INVALID;
6161 so->viW = VtsID_INVALID;
6162 so->magic = SO_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00006163 /* Add to double linked list */
6164 if (admin_SO) {
6165 tl_assert(admin_SO->admin_prev == NULL);
6166 admin_SO->admin_prev = so;
6167 so->admin_next = admin_SO;
6168 } else {
6169 so->admin_next = NULL;
6170 }
6171 so->admin_prev = NULL;
6172 admin_SO = so;
6173 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006174 return so;
6175}
sewardjffce8152011-06-24 10:09:41 +00006176
6177static void SO__Dealloc ( SO* so )
6178{
sewardjf98e1c02008-10-25 16:22:41 +00006179 tl_assert(so);
6180 tl_assert(so->magic == SO_MAGIC);
6181 if (so->viR == VtsID_INVALID) {
6182 tl_assert(so->viW == VtsID_INVALID);
6183 } else {
6184 tl_assert(so->viW != VtsID_INVALID);
6185 VtsID__rcdec(so->viR);
6186 VtsID__rcdec(so->viW);
6187 }
6188 so->magic = 0;
sewardjffce8152011-06-24 10:09:41 +00006189 /* Del from double linked list */
6190 if (so->admin_prev)
6191 so->admin_prev->admin_next = so->admin_next;
6192 if (so->admin_next)
6193 so->admin_next->admin_prev = so->admin_prev;
6194 if (so == admin_SO)
6195 admin_SO = so->admin_next;
6196 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006197 HG_(free)( so );
6198}
6199
6200
6201/////////////////////////////////////////////////////////
6202// //
6203// Top Level API //
6204// //
6205/////////////////////////////////////////////////////////
6206
florian6bd9dc12012-11-23 16:17:43 +00006207static void show_thread_state ( const HChar* str, Thr* t )
sewardjf98e1c02008-10-25 16:22:41 +00006208{
6209 if (1) return;
6210 if (t->viR == t->viW) {
6211 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6212 VtsID__pp( t->viR );
6213 VG_(printf)("%s","\n");
6214 } else {
6215 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6216 VtsID__pp( t->viR );
6217 VG_(printf)(" viW %u==", t->viW);
6218 VtsID__pp( t->viW );
6219 VG_(printf)("%s","\n");
6220 }
6221}
6222
6223
6224Thr* libhb_init (
6225 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00006226 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00006227 )
6228{
6229 Thr* thr;
6230 VtsID vi;
sewardje4cce742011-02-24 15:25:24 +00006231
6232 // We will have to have to store a large number of these,
6233 // so make sure they're the size we expect them to be.
6234 tl_assert(sizeof(ScalarTS) == 8);
sewardjffce8152011-06-24 10:09:41 +00006235
6236 /* because first 1024 unusable */
6237 tl_assert(SCALARTS_N_THRBITS >= 11);
6238 /* so as to fit in a UInt w/ 3 bits to spare (see defn of
6239 Thr_n_RCEC). */
6240 tl_assert(SCALARTS_N_THRBITS <= 29);
6241
6242 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6243 (32-bit). It's not correctness-critical, but there are a lot of
6244 them, so it's important from a space viewpoint. Unfortunately
6245 we simply can't pack it into 2 words on a 32-bit target. */
6246 if (sizeof(UWord) == 8) {
6247 tl_assert(sizeof(Thr_n_RCEC) == 16);
6248 } else {
6249 tl_assert(sizeof(Thr_n_RCEC) == 12);
6250 }
6251
6252 /* Word sets really are 32 bits. Even on a 64 bit target. */
6253 tl_assert(sizeof(WordSetID) == 4);
6254 tl_assert(sizeof(WordSet) == sizeof(WordSetID));
sewardje4cce742011-02-24 15:25:24 +00006255
sewardjf98e1c02008-10-25 16:22:41 +00006256 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00006257 tl_assert(get_EC);
6258 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00006259 main_get_EC = get_EC;
6260
6261 // No need to initialise hg_wordfm.
6262 // No need to initialise hg_wordset.
6263
sewardj7aa38a92011-02-27 23:04:12 +00006264 /* Allocated once and never deallocated. Used as a temporary in
6265 VTS singleton, tick and join operations. */
6266 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6267 temp_max_sized_VTS->id = VtsID_INVALID;
philippec3508652015-03-28 12:01:58 +00006268 verydead_thread_tables_init();
sewardjf98e1c02008-10-25 16:22:41 +00006269 vts_set_init();
6270 vts_tab_init();
6271 event_map_init();
6272 VtsID__invalidate_caches();
6273
6274 // initialise shadow memory
philippe1475a7f2015-05-11 19:45:08 +00006275 zsm_init( );
sewardjf98e1c02008-10-25 16:22:41 +00006276
6277 thr = Thr__new();
6278 vi = VtsID__mk_Singleton( thr, 1 );
6279 thr->viR = vi;
6280 thr->viW = vi;
6281 VtsID__rcinc(thr->viR);
6282 VtsID__rcinc(thr->viW);
6283
6284 show_thread_state(" root", thr);
6285 return thr;
6286}
6287
sewardj23f12002009-07-24 08:45:08 +00006288
sewardjf98e1c02008-10-25 16:22:41 +00006289Thr* libhb_create ( Thr* parent )
6290{
6291 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6292 the child's index. Since the child's index is guaranteed
6293 unique, it has never been seen before, so the implicit value
6294 before the tick is zero and after that is one. */
6295 Thr* child = Thr__new();
6296
6297 child->viR = VtsID__tick( parent->viR, child );
6298 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00006299 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00006300 VtsID__rcinc(child->viR);
6301 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006302 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00006303 early for that - it may not have a valid TId yet. So, let
6304 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00006305
6306 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6307 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6308
6309 /* and the parent has to move along too */
6310 VtsID__rcdec(parent->viR);
6311 VtsID__rcdec(parent->viW);
6312 parent->viR = VtsID__tick( parent->viR, parent );
6313 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00006314 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00006315 VtsID__rcinc(parent->viR);
6316 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006317 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00006318
6319 show_thread_state(" child", child);
6320 show_thread_state("parent", parent);
6321
6322 return child;
6323}
6324
6325/* Shut down the library, and print stats (in fact that's _all_
6326 this is for. */
6327void libhb_shutdown ( Bool show_stats )
6328{
6329 if (show_stats) {
6330 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6331 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6332 stats__secmaps_allocd,
6333 stats__secmap_ga_space_covered);
6334 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6335 stats__secmap_linesZ_allocd,
6336 stats__secmap_linesZ_bytes);
6337 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
6338 stats__secmap_linesF_allocd,
6339 stats__secmap_linesF_bytes);
philippef54cb662015-05-10 22:19:31 +00006340 VG_(printf)(" secmaps: %'10lu in map (can be scanGCed %'5lu)"
6341 " #%lu scanGC \n",
6342 stats__secmaps_in_map_shmem,
6343 shmem__SecMap_do_GC(False /* really do GC */),
6344 stats__secmaps_scanGC);
6345 tl_assert (VG_(sizeFM) (map_shmem) == stats__secmaps_in_map_shmem);
6346 VG_(printf)(" secmaps: %'10lu in freelist,"
6347 " total (scanGCed %'lu, ssetGCed %'lu)\n",
6348 SecMap_freelist_length(),
6349 stats__secmaps_scanGCed,
6350 stats__secmaps_ssetGCed);
sewardjf98e1c02008-10-25 16:22:41 +00006351 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6352 stats__secmaps_search, stats__secmaps_search_slow);
6353
6354 VG_(printf)("%s","\n");
6355 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6356 stats__cache_totrefs, stats__cache_totmisses );
6357 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6358 stats__cache_Z_fetches, stats__cache_F_fetches );
6359 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6360 stats__cache_Z_wbacks, stats__cache_F_wbacks );
philippef54cb662015-05-10 22:19:31 +00006361 VG_(printf)(" cache: %'14lu flushes_invals\n",
6362 stats__cache_flushes_invals );
sewardjf98e1c02008-10-25 16:22:41 +00006363 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6364 stats__cache_make_New_arange,
6365 stats__cache_make_New_inZrep);
6366
6367 VG_(printf)("%s","\n");
6368 VG_(printf)(" cline: %'10lu normalises\n",
6369 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00006370 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6371 stats__cline_cread64s,
6372 stats__cline_cread32s,
6373 stats__cline_cread16s,
6374 stats__cline_cread08s );
6375 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6376 stats__cline_cwrite64s,
6377 stats__cline_cwrite32s,
6378 stats__cline_cwrite16s,
6379 stats__cline_cwrite08s );
6380 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6381 stats__cline_swrite64s,
6382 stats__cline_swrite32s,
6383 stats__cline_swrite16s,
6384 stats__cline_swrite08s );
6385 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6386 stats__cline_sread08s, stats__cline_scopy08s );
philippef54cb662015-05-10 22:19:31 +00006387 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu"
6388 " 2to1 %'12lu\n",
6389 stats__cline_64to32splits, stats__cline_32to16splits,
6390 stats__cline_16to8splits );
6391 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu"
6392 " 2to1 %'12lu\n",
6393 stats__cline_64to32pulldown, stats__cline_32to16pulldown,
6394 stats__cline_16to8pulldown );
sewardjf98e1c02008-10-25 16:22:41 +00006395 if (0)
philippef54cb662015-05-10 22:19:31 +00006396 VG_(printf)(" cline: sizeof(CacheLineZ) %ld,"
6397 " covers %ld bytes of arange\n",
6398 (Word)sizeof(LineZ),
6399 (Word)N_LINE_ARANGE);
sewardjf98e1c02008-10-25 16:22:41 +00006400
6401 VG_(printf)("%s","\n");
6402
sewardjc8028ad2010-05-05 09:34:42 +00006403 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006404 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00006405 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006406 stats__msmcwrite, stats__msmcwrite_change);
6407 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6408 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00006409 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6410 stats__join2_queries, stats__join2_misses);
6411
6412 VG_(printf)("%s","\n");
philippef54cb662015-05-10 22:19:31 +00006413 VG_(printf)(" libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6414 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6415 VG_(printf)(" libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6416 stats__vts__cmp_structural, stats__vts__cmp_structural_slow);
6417 VG_(printf)(" libhb: VTSset: find__or__clone_and_add %'lu"
6418 " (%'lu allocd)\n",
sewardj7aa38a92011-02-27 23:04:12 +00006419 stats__vts_set__focaa, stats__vts_set__focaa_a );
sewardjc8028ad2010-05-05 09:34:42 +00006420 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6421 stats__vts__indexat_slow );
6422
6423 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00006424 VG_(printf)(
6425 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6426 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6427 );
philippe2bd23262015-05-11 20:56:49 +00006428 VG_(printf)(" libhb: #%lu vts_tab GC #%lu vts pruning\n",
6429 stats__vts_tab_GC, stats__vts_pruning);
sewardjf98e1c02008-10-25 16:22:41 +00006430 VG_(printf)( " libhb: %lu entries in vts_set\n",
6431 VG_(sizeFM)( vts_set ) );
6432
6433 VG_(printf)("%s","\n");
philippe900c5352015-03-24 14:02:44 +00006434 {
6435 UInt live = 0;
6436 UInt llexit_done = 0;
6437 UInt joinedwith_done = 0;
6438 UInt llexit_and_joinedwith_done = 0;
6439
6440 Thread* hgthread = get_admin_threads();
6441 tl_assert(hgthread);
6442 while (hgthread) {
6443 Thr* hbthr = hgthread->hbthr;
6444 tl_assert(hbthr);
6445 if (hbthr->llexit_done && hbthr->joinedwith_done)
6446 llexit_and_joinedwith_done++;
6447 else if (hbthr->llexit_done)
6448 llexit_done++;
6449 else if (hbthr->joinedwith_done)
6450 joinedwith_done++;
6451 else
6452 live++;
6453 hgthread = hgthread->admin;
6454 }
6455 VG_(printf)(" libhb: threads live: %d exit_and_joinedwith %d"
6456 " exit %d joinedwith %d\n",
6457 live, llexit_and_joinedwith_done,
6458 llexit_done, joinedwith_done);
philippec3508652015-03-28 12:01:58 +00006459 VG_(printf)(" libhb: %d verydead_threads, "
6460 "%d verydead_threads_not_pruned\n",
6461 (int) VG_(sizeXA)( verydead_thread_table),
6462 (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6463 tl_assert (VG_(sizeXA)( verydead_thread_table)
6464 + VG_(sizeXA)( verydead_thread_table_not_pruned)
6465 == llexit_and_joinedwith_done);
philippe900c5352015-03-24 14:02:44 +00006466 }
6467
6468 VG_(printf)("%s","\n");
philipped005b2c2015-04-21 21:58:14 +00006469 {
6470 UWord OldRef_accs_n[N_OLDREF_ACCS+1];
6471 UInt accs_n;
6472 UWord OldRef_n;
6473 UInt i;
6474
6475 OldRef_n = 0;
6476 for (i = 0; i <= N_OLDREF_ACCS; i++)
6477 OldRef_accs_n[i] = 0;
6478
6479 for (OldRef* o = mru.prev; o != &lru; o = o->prev) {
6480 OldRef_n++;
6481 accs_n = 0;
6482 for (i = 0; i < N_OLDREF_ACCS; i++) {
6483 if (o->accs[i].thrid != 0)
6484 accs_n++;
6485 }
6486 OldRef_accs_n[accs_n]++;
6487 }
6488
6489 tl_assert(OldRef_n == oldrefTreeN);
6490 VG_(printf)( " libhb: oldrefTreeN %lu ", oldrefTreeN);
6491 VG_(printf)( "( ");
6492 for (i = 0; i <= N_OLDREF_ACCS; i++)
6493 VG_(printf)( "accs[%d]=%lu ", i, OldRef_accs_n[i]);
6494 VG_(printf)( ")\n");
6495 }
sewardjf98e1c02008-10-25 16:22:41 +00006496 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
6497 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
6498 stats__ctxt_rcdec2,
6499 stats__ctxt_rcdec3 );
6500 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
6501 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
philippecabdbb52015-04-20 21:33:16 +00006502 VG_(printf)( " libhb: contextTab: %lu slots,"
6503 " %lu cur ents(ref'd %lu),"
philippe06bc23a2015-04-17 21:19:43 +00006504 " %lu max ents\n",
sewardjf98e1c02008-10-25 16:22:41 +00006505 (UWord)N_RCEC_TAB,
philippecabdbb52015-04-20 21:33:16 +00006506 stats__ctxt_tab_curr, RCEC_referenced,
6507 stats__ctxt_tab_max );
philippe47124e92015-04-25 14:00:24 +00006508 {
6509# define MAXCHAIN 10
6510 UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
6511 UInt non0chain = 0;
6512 UInt n;
6513 UInt i;
6514 RCEC *p;
6515
6516 for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
6517 for (i = 0; i < N_RCEC_TAB; i++) {
6518 n = 0;
6519 for (p = contextTab[i]; p; p = p->next)
6520 n++;
6521 if (n < MAXCHAIN)
6522 chains[n]++;
6523 else
6524 chains[MAXCHAIN]++;
6525 if (n > 0)
6526 non0chain++;
6527 }
6528 VG_(printf)( " libhb: contextTab chain of [length]=nchain."
6529 " Avg chain len %3.1f\n"
6530 " ",
6531 (Double)stats__ctxt_tab_curr
6532 / (Double)(non0chain ? non0chain : 1));
6533 for (i = 0; i <= MAXCHAIN; i++) {
6534 if (chains[i] != 0)
6535 VG_(printf)( "[%d%s]=%d ",
6536 i, i == MAXCHAIN ? "+" : "",
6537 chains[i]);
6538 }
6539 VG_(printf)( "\n");
6540# undef MAXCHAIN
6541 }
sewardjf98e1c02008-10-25 16:22:41 +00006542 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6543 stats__ctxt_tab_qs,
6544 stats__ctxt_tab_cmps );
6545#if 0
6546 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6547 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6548 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6549 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6550 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6551 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6552 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6553 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6554 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6555 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6556 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6557 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6558 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6559 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6560
6561 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6562 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6563 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6564 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6565#endif
6566
6567 VG_(printf)("%s","<<< END libhb stats >>>\n");
6568 VG_(printf)("%s","\n");
6569
6570 }
6571}
6572
sewardjffce8152011-06-24 10:09:41 +00006573/* Receive notification that a thread has low level exited. The
6574 significance here is that we do not expect to see any more memory
6575 references from it. */
sewardjf98e1c02008-10-25 16:22:41 +00006576void libhb_async_exit ( Thr* thr )
6577{
sewardj23f12002009-07-24 08:45:08 +00006578 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006579 tl_assert(!thr->llexit_done);
6580 thr->llexit_done = True;
sewardj2d2ea2f2009-08-02 10:15:07 +00006581
6582 /* free up Filter and local_Kws_n_stacks (well, actually not the
6583 latter ..) */
6584 tl_assert(thr->filter);
6585 HG_(free)(thr->filter);
6586 thr->filter = NULL;
6587
sewardjffce8152011-06-24 10:09:41 +00006588 /* Tell the VTS mechanism this thread has exited, so it can
6589 participate in VTS pruning. Note this can only happen if the
6590 thread has both ll_exited and has been joined with. */
6591 if (thr->joinedwith_done)
6592 VTS__declare_thread_very_dead(thr);
6593
sewardj2d2ea2f2009-08-02 10:15:07 +00006594 /* Another space-accuracy tradeoff. Do we want to be able to show
6595 H1 history for conflicts in threads which have since exited? If
6596 yes, then we better not free up thr->local_Kws_n_stacks. The
6597 downside is a potential per-thread leak of up to
6598 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6599 XArray average overcommit factor is (1.5 I'd guess). */
6600 // hence:
6601 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6602 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006603}
6604
sewardjffce8152011-06-24 10:09:41 +00006605/* Receive notification that a thread has been joined with. The
6606 significance here is that we do not expect to see any further
6607 references to its vector clocks (Thr::viR and Thr::viW). */
6608void libhb_joinedwith_done ( Thr* thr )
6609{
6610 tl_assert(thr);
6611 /* Caller must ensure that this is only ever called once per Thr. */
6612 tl_assert(!thr->joinedwith_done);
6613 thr->joinedwith_done = True;
6614 if (thr->llexit_done)
6615 VTS__declare_thread_very_dead(thr);
6616}
6617
6618
sewardjf98e1c02008-10-25 16:22:41 +00006619/* Both Segs and SOs point to VTSs. However, there is no sharing, so
6620 a Seg that points at a VTS is its one-and-only owner, and ditto for
6621 a SO that points at a VTS. */
6622
6623SO* libhb_so_alloc ( void )
6624{
6625 return SO__Alloc();
6626}
6627
6628void libhb_so_dealloc ( SO* so )
6629{
6630 tl_assert(so);
6631 tl_assert(so->magic == SO_MAGIC);
6632 SO__Dealloc(so);
6633}
6634
6635/* See comments in libhb.h for details on the meaning of
6636 strong vs weak sends and strong vs weak receives. */
6637void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6638{
6639 /* Copy the VTSs from 'thr' into the sync object, and then move
6640 the thread along one step. */
6641
6642 tl_assert(so);
6643 tl_assert(so->magic == SO_MAGIC);
6644
6645 /* stay sane .. a thread's read-clock must always lead or be the
6646 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00006647 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6648 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00006649 }
6650
6651 /* since we're overwriting the VtsIDs in the SO, we need to drop
6652 any references made by the previous contents thereof */
6653 if (so->viR == VtsID_INVALID) {
6654 tl_assert(so->viW == VtsID_INVALID);
6655 so->viR = thr->viR;
6656 so->viW = thr->viW;
6657 VtsID__rcinc(so->viR);
6658 VtsID__rcinc(so->viW);
6659 } else {
6660 /* In a strong send, we dump any previous VC in the SO and
6661 install the sending thread's VC instead. For a weak send we
6662 must join2 with what's already there. */
6663 tl_assert(so->viW != VtsID_INVALID);
6664 VtsID__rcdec(so->viR);
6665 VtsID__rcdec(so->viW);
6666 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6667 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6668 VtsID__rcinc(so->viR);
6669 VtsID__rcinc(so->viW);
6670 }
6671
6672 /* move both parent clocks along */
6673 VtsID__rcdec(thr->viR);
6674 VtsID__rcdec(thr->viW);
6675 thr->viR = VtsID__tick( thr->viR, thr );
6676 thr->viW = VtsID__tick( thr->viW, thr );
sewardjffce8152011-06-24 10:09:41 +00006677 if (!thr->llexit_done) {
sewardj2d2ea2f2009-08-02 10:15:07 +00006678 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00006679 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006680 }
sewardjf98e1c02008-10-25 16:22:41 +00006681 VtsID__rcinc(thr->viR);
6682 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006683
sewardjf98e1c02008-10-25 16:22:41 +00006684 if (strong_send)
6685 show_thread_state("s-send", thr);
6686 else
6687 show_thread_state("w-send", thr);
6688}
6689
6690void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6691{
6692 tl_assert(so);
6693 tl_assert(so->magic == SO_MAGIC);
6694
6695 if (so->viR != VtsID_INVALID) {
6696 tl_assert(so->viW != VtsID_INVALID);
6697
6698 /* Weak receive (basically, an R-acquisition of a R-W lock).
6699 This advances the read-clock of the receiver, but not the
6700 write-clock. */
6701 VtsID__rcdec(thr->viR);
6702 thr->viR = VtsID__join2( thr->viR, so->viR );
6703 VtsID__rcinc(thr->viR);
6704
sewardj90eb22e2009-07-28 20:22:18 +00006705 /* At one point (r10589) it seemed safest to tick the clocks for
6706 the receiving thread after the join. But on reflection, I
6707 wonder if that might cause it to 'overtake' constraints,
6708 which could lead to missing races. So, back out that part of
6709 r10589. */
6710 //VtsID__rcdec(thr->viR);
6711 //thr->viR = VtsID__tick( thr->viR, thr );
6712 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00006713
sewardjf98e1c02008-10-25 16:22:41 +00006714 /* For a strong receive, we also advance the receiver's write
6715 clock, which means the receive as a whole is essentially
6716 equivalent to a W-acquisition of a R-W lock. */
6717 if (strong_recv) {
6718 VtsID__rcdec(thr->viW);
6719 thr->viW = VtsID__join2( thr->viW, so->viW );
6720 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006721
sewardj90eb22e2009-07-28 20:22:18 +00006722 /* See comment just above, re r10589. */
6723 //VtsID__rcdec(thr->viW);
6724 //thr->viW = VtsID__tick( thr->viW, thr );
6725 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00006726 }
6727
sewardjf4845dc2010-05-28 20:09:59 +00006728 if (thr->filter)
6729 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00006730 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00006731
sewardjf98e1c02008-10-25 16:22:41 +00006732 if (strong_recv)
6733 show_thread_state("s-recv", thr);
6734 else
6735 show_thread_state("w-recv", thr);
6736
6737 } else {
6738 tl_assert(so->viW == VtsID_INVALID);
6739 /* Deal with degenerate case: 'so' has no vts, so there has been
6740 no message posted to it. Just ignore this case. */
6741 show_thread_state("d-recv", thr);
6742 }
6743}
6744
6745Bool libhb_so_everSent ( SO* so )
6746{
6747 if (so->viR == VtsID_INVALID) {
6748 tl_assert(so->viW == VtsID_INVALID);
6749 return False;
6750 } else {
6751 tl_assert(so->viW != VtsID_INVALID);
6752 return True;
6753 }
6754}
6755
6756#define XXX1 0 // 0x67a106c
6757#define XXX2 0
6758
sewardj23f12002009-07-24 08:45:08 +00006759static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00006760 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6761 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6762 return False;
6763}
florian0c8a47c2013-10-01 20:10:21 +00006764static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
florian6bf37262012-10-21 03:23:36 +00006765{
sewardj23f12002009-07-24 08:45:08 +00006766 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00006767 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6768 show_thread_state("", thr);
6769 VG_(printf)("%s","\n");
6770}
6771
sewardj23f12002009-07-24 08:45:08 +00006772void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006773{
6774 SVal sv = SVal__mkC(thr->viW, thr->viW);
6775 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00006776 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6777 zsm_sset_range( a, szB, sv );
6778 Filter__clear_range( thr->filter, a, szB );
6779 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00006780}
6781
sewardjfd35d492011-03-17 19:39:55 +00006782void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006783{
sewardj23f12002009-07-24 08:45:08 +00006784 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00006785}
6786
philippef54cb662015-05-10 22:19:31 +00006787
6788/* Set the lines zix_start till zix_end to NOACCESS. */
6789static void zsm_secmap_line_range_noaccess (SecMap *sm,
6790 UInt zix_start, UInt zix_end)
6791{
6792 for (UInt lz = zix_start; lz <= zix_end; lz++) {
6793 LineZ* lineZ;
6794 LineF* lineF;
6795 lineZ = &sm->linesZ[lz];
6796 if (lineZ->dict[0] != SVal_INVALID) {
6797 rcdec_LineZ(lineZ);
6798 } else {
6799 UInt fix = (UInt)lineZ->dict[1];
6800 tl_assert(sm->linesF);
6801 tl_assert(sm->linesF_size > 0);
6802 tl_assert(fix >= 0 && fix < sm->linesF_size);
6803 lineF = &sm->linesF[fix];
6804 rcdec_LineF(lineF);
6805 lineF->inUse = False;
6806 }
6807 lineZ->dict[0] = SVal_NOACCESS;
6808 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
6809 for (UInt i = 0; i < N_LINE_ARANGE/4; i++)
6810 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
6811 }
6812}
6813
6814/* Set the given range to SVal_NOACCESS in-place in the secmap.
6815 a must be cacheline aligned. len must be a multiple of a cacheline
6816 and must be < N_SECMAP_ARANGE. */
6817static void zsm_sset_range_noaccess_in_secmap(Addr a, SizeT len)
6818{
6819 tl_assert (is_valid_scache_tag (a));
6820 tl_assert (0 == (len & (N_LINE_ARANGE - 1)));
6821 tl_assert (len < N_SECMAP_ARANGE);
6822
6823 SecMap *sm1 = shmem__find_SecMap (a);
6824 SecMap *sm2 = shmem__find_SecMap (a + len - 1);
6825 UWord zix_start = shmem__get_SecMap_offset(a ) >> N_LINE_BITS;
6826 UWord zix_end = shmem__get_SecMap_offset(a + len - 1) >> N_LINE_BITS;
6827
6828 if (sm1) {
6829 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm1));
6830 zsm_secmap_line_range_noaccess (sm1, zix_start,
6831 sm1 == sm2 ? zix_end : N_SECMAP_ZLINES-1);
6832 }
6833 if (sm2 && sm1 != sm2) {
6834 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm2));
6835 zsm_secmap_line_range_noaccess (sm2, 0, zix_end);
6836 }
6837}
6838
6839/* Set the given address range to SVal_NOACCESS.
6840 The SecMaps fully set to SVal_NOACCESS will be pushed in SecMap_freelist. */
6841static void zsm_sset_range_noaccess (Addr addr, SizeT len)
6842{
6843 /*
6844 BPC = Before, Partial Cacheline, = addr
6845 (i.e. starting inside a cacheline/inside a SecMap)
6846 BFC = Before, Full Cacheline(s), but not full SecMap
6847 (i.e. starting inside a SecMap)
6848 FSM = Full SecMap(s)
6849 (i.e. starting a SecMap)
6850 AFC = After, Full Cacheline(s), but not full SecMap
6851 (i.e. first address after the full SecMap(s))
6852 APC = After, Partial Cacheline, i.e. first address after the
6853 full CacheLines).
6854 ARE = After Range End = addr+len = first address not part of the range.
6855
6856 If addr starts a Cacheline, then BPC == BFC.
6857 If addr starts a SecMap, then BPC == BFC == FSM.
6858 If addr+len starts a SecMap, then APC == ARE == AFC
6859 If addr+len starts a Cacheline, then APC == ARE
6860 */
6861 Addr ARE = addr + len;
6862 Addr BPC = addr;
6863 Addr BFC = ROUNDUP(BPC, N_LINE_ARANGE);
6864 Addr FSM = ROUNDUP(BPC, N_SECMAP_ARANGE);
6865 Addr AFC = ROUNDDN(ARE, N_SECMAP_ARANGE);
6866 Addr APC = ROUNDDN(ARE, N_LINE_ARANGE);
6867 SizeT Plen = len; // Plen will be split between the following:
6868 SizeT BPClen;
6869 SizeT BFClen;
6870 SizeT FSMlen;
6871 SizeT AFClen;
6872 SizeT APClen;
6873
6874 /* Consumes from Plen the nr of bytes between from and to.
6875 from and to must be aligned on a multiple of round.
6876 The length consumed will be a multiple of round, with
6877 a maximum of Plen. */
6878# define PlenCONSUME(from, to, round, consumed) \
6879 do { \
6880 if (from < to) { \
6881 if (to - from < Plen) \
6882 consumed = to - from; \
6883 else \
6884 consumed = ROUNDDN(Plen, round); \
6885 } else { \
6886 consumed = 0; \
6887 } \
6888 Plen -= consumed; } while (0)
6889
6890 PlenCONSUME(BPC, BFC, 1, BPClen);
6891 PlenCONSUME(BFC, FSM, N_LINE_ARANGE, BFClen);
6892 PlenCONSUME(FSM, AFC, N_SECMAP_ARANGE, FSMlen);
6893 PlenCONSUME(AFC, APC, N_LINE_ARANGE, AFClen);
6894 PlenCONSUME(APC, ARE, 1, APClen);
6895
6896 if (0)
6897 VG_(printf) ("addr %p[%ld] ARE %p"
6898 " BPC %p[%ld] BFC %p[%ld] FSM %p[%ld]"
6899 " AFC %p[%ld] APC %p[%ld]\n",
6900 (void*)addr, len, (void*)ARE,
6901 (void*)BPC, BPClen, (void*)BFC, BFClen, (void*)FSM, FSMlen,
6902 (void*)AFC, AFClen, (void*)APC, APClen);
6903
6904 tl_assert (Plen == 0);
6905
6906 /* Set to NOACCESS pieces before and after not covered by entire SecMaps. */
6907
6908 /* First we set the partial cachelines. This is done through the cache. */
6909 if (BPClen > 0)
6910 zsm_sset_range_SMALL (BPC, BPClen, SVal_NOACCESS);
6911 if (APClen > 0)
6912 zsm_sset_range_SMALL (APC, APClen, SVal_NOACCESS);
6913
6914 /* After this, we will not use the cache anymore. We will directly work
6915 in-place on the z shadow memory in SecMap(s).
6916 So, we invalidate the cachelines for the whole range we are setting
6917 to NOACCESS below. */
6918 shmem__invalidate_scache_range (BFC, APC - BFC);
6919
6920 if (BFClen > 0)
6921 zsm_sset_range_noaccess_in_secmap (BFC, BFClen);
6922 if (AFClen > 0)
6923 zsm_sset_range_noaccess_in_secmap (AFC, AFClen);
6924
6925 if (FSMlen > 0) {
6926 /* Set to NOACCESS all the SecMaps, pushing the SecMaps to the
6927 free list. */
6928 Addr sm_start = FSM;
6929 while (sm_start < AFC) {
6930 SecMap *sm = shmem__find_SecMap (sm_start);
6931 if (sm) {
6932 Addr gaKey;
6933 SecMap *fm_sm;
6934
6935 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
6936 for (UInt lz = 0; lz < N_SECMAP_ZLINES; lz++) {
philippefc00a2a2015-05-15 11:41:54 +00006937 if (LIKELY(sm->linesZ[lz].dict[0] != SVal_INVALID))
philippef54cb662015-05-10 22:19:31 +00006938 rcdec_LineZ(&sm->linesZ[lz]);
6939 }
6940 for (UInt lf = 0; lf < sm->linesF_size; lf++) {
6941 if (sm->linesF[lf].inUse)
6942 rcdec_LineF (&sm->linesF[lf]);
6943 }
6944 if (sm->linesF_size > 0) {
6945 HG_(free)(sm->linesF);
6946 stats__secmap_linesF_allocd -= sm->linesF_size;
6947 stats__secmap_linesF_bytes -= sm->linesF_size * sizeof(LineF);
6948 }
6949 if (!VG_(delFromFM)(map_shmem, &gaKey, (UWord*)&fm_sm, sm_start))
6950 tl_assert (0);
6951 stats__secmaps_in_map_shmem--;
6952 tl_assert (gaKey == sm_start);
6953 tl_assert (sm == fm_sm);
6954 stats__secmaps_ssetGCed++;
6955 push_SecMap_on_freelist (sm);
6956 }
6957 sm_start += N_SECMAP_ARANGE;
6958 }
6959 tl_assert (sm_start == AFC);
6960
6961 /* The above loop might have kept copies of freed SecMap in the smCache.
6962 => clear them. */
6963 if (address_in_range(smCache[0].gaKey, FSM, FSMlen)) {
6964 smCache[0].gaKey = 1;
6965 smCache[0].sm = NULL;
6966 }
6967 if (address_in_range(smCache[1].gaKey, FSM, FSMlen)) {
6968 smCache[1].gaKey = 1;
6969 smCache[1].sm = NULL;
6970 }
6971 if (address_in_range(smCache[2].gaKey, FSM, FSMlen)) {
6972 smCache[2].gaKey = 1;
6973 smCache[2].sm = NULL;
6974 }
6975 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(SMCacheEnt));
6976 }
6977}
6978
sewardjfd35d492011-03-17 19:39:55 +00006979void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6980{
6981 /* This really does put the requested range in NoAccess. It's
6982 expensive though. */
6983 SVal sv = SVal_NOACCESS;
6984 tl_assert(is_sane_SVal_C(sv));
philippef54cb662015-05-10 22:19:31 +00006985 if (LIKELY(szB < 2 * N_LINE_ARANGE))
6986 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
6987 else
6988 zsm_sset_range_noaccess (a, szB);
sewardjfd35d492011-03-17 19:39:55 +00006989 Filter__clear_range( thr->filter, a, szB );
6990}
6991
philippef54cb662015-05-10 22:19:31 +00006992/* Works byte at a time. Can be optimised if needed. */
6993UWord libhb_srange_get_abits (Addr a, UChar *abits, SizeT len)
6994{
6995 UWord anr = 0; // nr of bytes addressable.
6996
6997 /* Get the accessibility of each byte. Pay attention to not
6998 create SecMap or LineZ when checking if a byte is addressable.
6999
7000 Note: this is used for client request. Performance deemed not critical.
7001 So for simplicity, we work byte per byte.
7002 Performance could be improved by working with full cachelines
7003 or with full SecMap, when reaching a cacheline or secmap boundary. */
7004 for (SizeT i = 0; i < len; i++) {
7005 SVal sv = SVal_INVALID;
7006 Addr b = a + i;
7007 Addr tag = b & ~(N_LINE_ARANGE - 1);
7008 UWord wix = (b >> N_LINE_BITS) & (N_WAY_NENT - 1);
7009 UWord cloff = get_cacheline_offset(b);
7010
7011 /* Note: we do not use get_cacheline(b) to avoid creating cachelines
7012 and/or SecMap for non addressable bytes. */
7013 if (tag == cache_shmem.tags0[wix]) {
7014 CacheLine copy = cache_shmem.lyns0[wix];
7015 /* We work on a copy of the cacheline, as we do not want to
7016 record the client request as a real read.
7017 The below is somewhat similar to zsm_sapply08__msmcread but
7018 avoids side effects on the cache. */
7019 UWord toff = get_tree_offset(b); /* == 0 .. 7 */
7020 UWord tno = get_treeno(b);
7021 UShort descr = copy.descrs[tno];
7022 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
7023 SVal* tree = &copy.svals[tno << 3];
7024 copy.descrs[tno] = pulldown_to_8(tree, toff, descr);
7025 }
7026 sv = copy.svals[cloff];
7027 } else {
7028 /* Byte not found in the cacheline. Search for a SecMap. */
7029 SecMap *sm = shmem__find_SecMap(b);
7030 LineZ *lineZ;
7031 if (sm == NULL)
7032 sv = SVal_NOACCESS;
7033 else {
7034 UWord zix = shmem__get_SecMap_offset(b) >> N_LINE_BITS;
7035 lineZ = &sm->linesZ[zix];
7036 if (lineZ->dict[0] == SVal_INVALID) {
7037 UInt fix = (UInt)lineZ->dict[1];
7038 sv = sm->linesF[fix].w64s[cloff];
7039 } else {
7040 UWord ix = read_twobit_array( lineZ->ix2s, cloff );
7041 sv = lineZ->dict[ix];
7042 }
7043 }
7044 }
7045
7046 tl_assert (sv != SVal_INVALID);
7047 if (sv == SVal_NOACCESS) {
7048 if (abits)
7049 abits[i] = 0x00;
7050 } else {
7051 if (abits)
7052 abits[i] = 0xff;
7053 anr++;
7054 }
7055 }
7056
7057 return anr;
7058}
7059
7060
sewardj406bac82010-03-03 23:03:40 +00007061void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
7062{
7063 SVal sv = SVal_NOACCESS;
7064 tl_assert(is_sane_SVal_C(sv));
7065 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
philippef54cb662015-05-10 22:19:31 +00007066 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7067 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7068 else
7069 zsm_sset_range_noaccess (a, szB);
sewardj406bac82010-03-03 23:03:40 +00007070 Filter__clear_range( thr->filter, a, szB );
7071 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
7072}
7073
sewardj0b20a152011-03-10 21:34:21 +00007074Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
sewardjf98e1c02008-10-25 16:22:41 +00007075 tl_assert(thr);
sewardj60626642011-03-10 15:14:37 +00007076 return thr->hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00007077}
7078
sewardj0b20a152011-03-10 21:34:21 +00007079void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
sewardjf98e1c02008-10-25 16:22:41 +00007080 tl_assert(thr);
sewardj0b20a152011-03-10 21:34:21 +00007081 thr->hgthread = hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00007082}
7083
sewardj23f12002009-07-24 08:45:08 +00007084void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00007085{
sewardj23f12002009-07-24 08:45:08 +00007086 zsm_scopy_range(src, dst, len);
7087 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00007088}
7089
7090void libhb_maybe_GC ( void )
7091{
philippecabdbb52015-04-20 21:33:16 +00007092 /* GC the unreferenced (zero rc) RCECs when
philippee0829e02015-04-21 20:55:40 +00007093 (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
7094 with mostly NULL ptr)
7095 and (2) approaching the max nr of RCEC (as we have in any case
7096 at least that amount of RCEC in the pool allocator)
7097 Note: the margin allows to avoid a small but constant increase
7098 of the max nr of RCEC due to the fact that libhb_maybe_GC is
7099 not called when the current nr of RCEC exactly reaches the max.
7100 and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
7101 Avoid growing too much the nr of RCEC keeps the memory use low,
7102 and avoids to have too many elements in the (fixed) contextTab hashtable.
7103 */
philippecabdbb52015-04-20 21:33:16 +00007104 if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
philippee0829e02015-04-21 20:55:40 +00007105 && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
philippef54cb662015-05-10 22:19:31 +00007106 && (stats__ctxt_tab_curr * 3)/4 > RCEC_referenced))
philippecabdbb52015-04-20 21:33:16 +00007107 do_RCEC_GC();
philippe158404e2015-04-10 19:34:14 +00007108
philippef54cb662015-05-10 22:19:31 +00007109 /* If there are still no entries available (all the table entries are full),
7110 and we hit the threshhold point, then do a GC */
7111 Bool vts_tab_GC = vts_tab_freelist == VtsID_INVALID
7112 && VG_(sizeXA)( vts_tab ) >= vts_next_GC_at;
7113 if (UNLIKELY (vts_tab_GC))
7114 vts_tab__do_GC( False/*don't show stats*/ );
7115
7116 /* scan GC the SecMaps when
7117 (1) no SecMap in the freelist
7118 and (2) the current nr of live secmaps exceeds the threshold. */
7119 if (UNLIKELY(SecMap_freelist == NULL
7120 && stats__secmaps_in_map_shmem >= next_SecMap_GC_at)) {
7121 // If we did a vts tab GC, then no need to flush the cache again.
7122 if (!vts_tab_GC)
7123 zsm_flush_cache();
7124 shmem__SecMap_do_GC(True);
7125 }
philippecabdbb52015-04-20 21:33:16 +00007126
7127 /* Check the reference counts (expensive) */
7128 if (CHECK_CEM)
7129 event_map__check_reference_counts();
sewardjf98e1c02008-10-25 16:22:41 +00007130}
7131
7132
7133/////////////////////////////////////////////////////////////////
7134/////////////////////////////////////////////////////////////////
7135// //
7136// SECTION END main library //
7137// //
7138/////////////////////////////////////////////////////////////////
7139/////////////////////////////////////////////////////////////////
7140
7141/*--------------------------------------------------------------------*/
7142/*--- end libhb_main.c ---*/
7143/*--------------------------------------------------------------------*/