blob: 573248b20097bd4e3573a7e5787585e0d7f0ab69 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
sewardj0f157dd2013-10-18 14:27:36 +000012 Copyright (C) 2008-2013 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000034#include "pub_tool_poolalloc.h"
sewardjf98e1c02008-10-25 16:22:41 +000035#include "pub_tool_libcassert.h"
36#include "pub_tool_libcbase.h"
37#include "pub_tool_libcprint.h"
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000040#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000041#include "pub_tool_xarray.h"
42#include "pub_tool_oset.h"
43#include "pub_tool_threadstate.h"
44#include "pub_tool_aspacemgr.h"
45#include "pub_tool_execontext.h"
46#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000047#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000048#include "hg_basics.h"
49#include "hg_wordset.h"
50#include "hg_lock_n_thread.h"
51#include "hg_errors.h"
52
53#include "libhb.h"
54
55
sewardj8f5374e2008-12-07 11:40:17 +000056/////////////////////////////////////////////////////////////////
57/////////////////////////////////////////////////////////////////
58// //
59// Debugging #defines //
60// //
61/////////////////////////////////////////////////////////////////
62/////////////////////////////////////////////////////////////////
63
64/* Check the sanity of shadow values in the core memory state
65 machine. Change #if 0 to #if 1 to enable this. */
66#if 0
67# define CHECK_MSM 1
68#else
69# define CHECK_MSM 0
70#endif
71
72
73/* Check sanity (reference counts, etc) in the conflicting access
74 machinery. Change #if 0 to #if 1 to enable this. */
75#if 0
76# define CHECK_CEM 1
77#else
78# define CHECK_CEM 0
79#endif
80
81
82/* Check sanity in the compressed shadow memory machinery,
83 particularly in its caching innards. Unfortunately there's no
84 almost-zero-cost way to make them selectable at run time. Hence
85 set the #if 0 to #if 1 and rebuild if you want them. */
86#if 0
87# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
88# define inline __attribute__((noinline))
89 /* probably want to ditch -fomit-frame-pointer too */
90#else
91# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
92#endif
93
94
95/////////////////////////////////////////////////////////////////
96/////////////////////////////////////////////////////////////////
97// //
sewardjffce8152011-06-24 10:09:41 +000098// data decls: VtsID //
99// //
100/////////////////////////////////////////////////////////////////
101/////////////////////////////////////////////////////////////////
102
103/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
104 bits, since they have to be packed into the lowest 30 bits of an
105 SVal. */
106typedef UInt VtsID;
107#define VtsID_INVALID 0xFFFFFFFF
108
109
110
111/////////////////////////////////////////////////////////////////
112/////////////////////////////////////////////////////////////////
113// //
114// data decls: SVal //
115// //
116/////////////////////////////////////////////////////////////////
117/////////////////////////////////////////////////////////////////
118
119typedef ULong SVal;
120
121/* This value has special significance to the implementation, and callers
122 may not store it in the shadow memory. */
123#define SVal_INVALID (3ULL << 62)
124
125/* This is the default value for shadow memory. Initially the shadow
126 memory contains no accessible areas and so all reads produce this
127 value. TODO: make this caller-defineable. */
128#define SVal_NOACCESS (2ULL << 62)
129
130
131
132/////////////////////////////////////////////////////////////////
133/////////////////////////////////////////////////////////////////
134// //
135// data decls: ScalarTS //
136// //
137/////////////////////////////////////////////////////////////////
138/////////////////////////////////////////////////////////////////
139
140/* Scalar Timestamp. We have to store a lot of these, so there is
141 some effort to make them as small as possible. Logically they are
142 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
143 We pack it into 64 bits by representing the Thr* using a ThrID, a
144 small integer (18 bits), and a 46 bit integer for the timestamp
145 number. The 46/18 split is arbitary, but has the effect that
146 Helgrind can only handle programs that create 2^18 or fewer threads
147 over their entire lifetime, and have no more than 2^46 timestamp
148 ticks (synchronisation operations on the same thread).
149
150 This doesn't seem like much of a limitation. 2^46 ticks is
151 7.06e+13, and if each tick (optimistically) takes the machine 1000
152 cycles to process, then the minimum time to process that many ticks
153 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
154 but VTS ticks, which isn't realistic.
155
156 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
157 32 since a ThrID is a UInt. 29 comes from the fact that
158 'Thr_n_RCEC', which records information about old accesses, packs
159 not only a ThrID but also 2+1 other bits (access size and
160 writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
161
162 NB2: thrid values are issued upwards from 1024, and values less
163 than that aren't valid. This isn't per se necessary (any order
164 will do, so long as they are unique), but it does help ensure they
165 are less likely to get confused with the various other kinds of
166 small-integer thread ids drifting around (eg, TId). See also NB5.
167
168 NB3: this probably also relies on the fact that Thr's are never
169 deallocated -- they exist forever. Hence the 1-1 mapping from
170 Thr's to thrid values (set up in Thr__new) persists forever.
171
172 NB4: temp_max_sized_VTS is allocated at startup and never freed.
173 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
174 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
175 making the memory use for this go sky-high. With
176 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
177 like an OK tradeoff. If more than 256k threads need to be
178 supported, we could change SCALARTS_N_THRBITS to 20, which would
179 facilitate supporting 1 million threads at the cost of 8MB storage
180 for temp_max_sized_VTS.
181
182 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
183 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
184 must never be a valid ThrID. Given NB2 that's OK.
185*/
186#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
187
188#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
189typedef
190 struct {
191 ThrID thrid : SCALARTS_N_THRBITS;
192 ULong tym : SCALARTS_N_TYMBITS;
193 }
194 ScalarTS;
195
196#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
197
198
199
200/////////////////////////////////////////////////////////////////
201/////////////////////////////////////////////////////////////////
202// //
203// data decls: Filter //
204// //
205/////////////////////////////////////////////////////////////////
206/////////////////////////////////////////////////////////////////
207
208// baseline: 5, 9
209#define FI_LINE_SZB_LOG2 5
210#define FI_NUM_LINES_LOG2 10
211
212#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
213#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
214
215#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
216#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
217
218#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
219 & (Addr)(FI_NUM_LINES-1) )
220
221
222/* In the lines, each 8 bytes are treated individually, and are mapped
223 to a UShort. Regardless of endianness of the underlying machine,
224 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
225 the highest address.
226
227 Of each bit pair, the higher numbered bit is set if a R has been
228 seen, so the actual layout is:
229
230 15 14 ... 01 00
231
232 R W for addr+7 ... R W for addr+0
233
234 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
235*/
236
237/* tags are separated from lines. tags are Addrs and are
238 the base address of the line. */
239typedef
240 struct {
241 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
242 }
243 FiLine;
244
245typedef
246 struct {
247 Addr tags[FI_NUM_LINES];
248 FiLine lines[FI_NUM_LINES];
249 }
250 Filter;
251
252
253
254/////////////////////////////////////////////////////////////////
255/////////////////////////////////////////////////////////////////
256// //
257// data decls: Thr, ULong_n_EC //
258// //
259/////////////////////////////////////////////////////////////////
260/////////////////////////////////////////////////////////////////
261
262// Records stacks for H1 history mechanism (DRD-style)
263typedef
264 struct { ULong ull; ExeContext* ec; }
265 ULong_n_EC;
266
267
268/* How many of the above records to collect for each thread? Older
269 ones are dumped when we run out of space. 62.5k requires 1MB per
270 thread, since each ULong_n_EC record is 16 bytes long. When more
271 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
272 deleted to make space. Hence in the worst case we will be able to
273 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
274 Kw transitions (segments in this thread). For the current setting
275 that gives a guaranteed stack for at least the last 31.25k
276 segments. */
277#define N_KWs_N_STACKs_PER_THREAD 62500
278
279
280struct _Thr {
281 /* Current VTSs for this thread. They change as we go along. viR
282 is the VTS to be used for reads, viW for writes. Usually they
283 are the same, but can differ when we deal with reader-writer
284 locks. It is always the case that
285 VtsID__cmpLEQ(viW,viR) == True
286 that is, viW must be the same, or lagging behind, viR. */
287 VtsID viR;
288 VtsID viW;
289
290 /* Is initially False, and is set to True after the thread really
291 has done a low-level exit. When True, we expect to never see
292 any more memory references done by this thread. */
293 Bool llexit_done;
294
295 /* Is initially False, and is set to True after the thread has been
296 joined with (reaped by some other thread). After this point, we
297 do not expect to see any uses of .viR or .viW, so it is safe to
298 set them to VtsID_INVALID. */
299 Bool joinedwith_done;
300
301 /* A small integer giving a unique identity to this Thr. See
302 comments on the definition of ScalarTS for details. */
303 ThrID thrid : SCALARTS_N_THRBITS;
304
305 /* A filter that removes references for which we believe that
306 msmcread/msmcwrite will not change the state, nor report a
307 race. */
308 Filter* filter;
309
310 /* A pointer back to the top level Thread structure. There is a
311 1-1 mapping between Thread and Thr structures -- each Thr points
312 at its corresponding Thread, and vice versa. Really, Thr and
313 Thread should be merged into a single structure. */
314 Thread* hgthread;
315
316 /* The ULongs (scalar Kws) in this accumulate in strictly
317 increasing order, without duplicates. This is important because
318 we need to be able to find a given scalar Kw in this array
319 later, by binary search. */
320 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
321};
322
323
324
325/////////////////////////////////////////////////////////////////
326/////////////////////////////////////////////////////////////////
327// //
328// data decls: SO //
329// //
330/////////////////////////////////////////////////////////////////
331/////////////////////////////////////////////////////////////////
332
333// (UInt) `echo "Synchronisation object" | md5sum`
334#define SO_MAGIC 0x56b3c5b0U
335
336struct _SO {
337 struct _SO* admin_prev;
338 struct _SO* admin_next;
339 VtsID viR; /* r-clock of sender */
340 VtsID viW; /* w-clock of sender */
341 UInt magic;
342};
343
344
345
346/////////////////////////////////////////////////////////////////
347/////////////////////////////////////////////////////////////////
348// //
sewardj8f5374e2008-12-07 11:40:17 +0000349// Forward declarations //
350// //
351/////////////////////////////////////////////////////////////////
352/////////////////////////////////////////////////////////////////
353
sewardjf98e1c02008-10-25 16:22:41 +0000354/* fwds for
355 Globals needed by other parts of the library. These are set
356 once at startup and then never changed. */
357static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000358static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000359
sewardjffce8152011-06-24 10:09:41 +0000360/* misc fn and data fwdses */
361static void VtsID__rcinc ( VtsID ii );
362static void VtsID__rcdec ( VtsID ii );
363
364static inline Bool SVal__isC ( SVal s );
365static inline VtsID SVal__unC_Rmin ( SVal s );
366static inline VtsID SVal__unC_Wmin ( SVal s );
367static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
philippe1475a7f2015-05-11 19:45:08 +0000368static inline void SVal__rcinc ( SVal s );
369static inline void SVal__rcdec ( SVal s );
sewardjffce8152011-06-24 10:09:41 +0000370
371/* A double linked list of all the SO's. */
372SO* admin_SO;
373
sewardjf98e1c02008-10-25 16:22:41 +0000374
375
376/////////////////////////////////////////////////////////////////
377/////////////////////////////////////////////////////////////////
378// //
379// SECTION BEGIN compressed shadow memory //
380// //
381/////////////////////////////////////////////////////////////////
382/////////////////////////////////////////////////////////////////
383
384#ifndef __HB_ZSM_H
385#define __HB_ZSM_H
386
sewardjf98e1c02008-10-25 16:22:41 +0000387/* Initialise the library. Once initialised, it will (or may) call
philippe1475a7f2015-05-11 19:45:08 +0000388 SVal__rcinc and SVal__rcdec in response to all the calls below, in order to
sewardjf98e1c02008-10-25 16:22:41 +0000389 allow the user to do reference counting on the SVals stored herein.
390 It is important to understand, however, that due to internal
391 caching, the reference counts are in general inaccurate, and can be
392 both above or below the true reference count for an item. In
393 particular, the library may indicate that the reference count for
394 an item is zero, when in fact it is not.
395
396 To make the reference counting exact and therefore non-pointless,
397 call zsm_flush_cache. Immediately after it returns, the reference
398 counts for all items, as deduced by the caller by observing calls
philippe1475a7f2015-05-11 19:45:08 +0000399 to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a
400 zero reference count may be freed (or at least considered to be
sewardjf98e1c02008-10-25 16:22:41 +0000401 unreferenced by this library).
402*/
philippe1475a7f2015-05-11 19:45:08 +0000403static void zsm_init ( void );
sewardjf98e1c02008-10-25 16:22:41 +0000404
sewardj23f12002009-07-24 08:45:08 +0000405static void zsm_sset_range ( Addr, SizeT, SVal );
philippef54cb662015-05-10 22:19:31 +0000406static void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew );
sewardj23f12002009-07-24 08:45:08 +0000407static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000408static void zsm_flush_cache ( void );
409
410#endif /* ! __HB_ZSM_H */
411
412
sewardjf98e1c02008-10-25 16:22:41 +0000413/* Round a up to the next multiple of N. N must be a power of 2 */
414#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
415/* Round a down to the next multiple of N. N must be a power of 2 */
416#define ROUNDDN(a, N) ((a) & ~(N-1))
417
philippef54cb662015-05-10 22:19:31 +0000418/* True if a belongs in range [start, start + szB[
419 (i.e. start + szB is excluded). */
420static inline Bool address_in_range (Addr a, Addr start, SizeT szB)
421{
422 /* Checking start <= a && a < start + szB.
423 As start and a are unsigned addresses, the condition can
424 be simplified. */
425 if (CHECK_ZSM)
426 tl_assert ((a - start < szB)
427 == (start <= a
428 && a < start + szB));
429 return a - start < szB;
430}
sewardjf98e1c02008-10-25 16:22:41 +0000431
sewardjf98e1c02008-10-25 16:22:41 +0000432/* ------ CacheLine ------ */
433
434#define N_LINE_BITS 6 /* must be >= 3 */
435#define N_LINE_ARANGE (1 << N_LINE_BITS)
436#define N_LINE_TREES (N_LINE_ARANGE >> 3)
437
438typedef
439 struct {
440 UShort descrs[N_LINE_TREES];
441 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
442 }
443 CacheLine;
444
445#define TREE_DESCR_16_0 (1<<0)
446#define TREE_DESCR_32_0 (1<<1)
447#define TREE_DESCR_16_1 (1<<2)
448#define TREE_DESCR_64 (1<<3)
449#define TREE_DESCR_16_2 (1<<4)
450#define TREE_DESCR_32_1 (1<<5)
451#define TREE_DESCR_16_3 (1<<6)
452#define TREE_DESCR_8_0 (1<<7)
453#define TREE_DESCR_8_1 (1<<8)
454#define TREE_DESCR_8_2 (1<<9)
455#define TREE_DESCR_8_3 (1<<10)
456#define TREE_DESCR_8_4 (1<<11)
457#define TREE_DESCR_8_5 (1<<12)
458#define TREE_DESCR_8_6 (1<<13)
459#define TREE_DESCR_8_7 (1<<14)
460#define TREE_DESCR_DTY (1<<15)
461
462typedef
463 struct {
464 SVal dict[4]; /* can represent up to 4 diff values in the line */
465 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
466 dict indexes */
467 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
468 LineF to use, and dict[2..] are also SVal_INVALID. */
469 }
470 LineZ; /* compressed rep for a cache line */
471
472typedef
473 struct {
474 Bool inUse;
475 SVal w64s[N_LINE_ARANGE];
476 }
477 LineF; /* full rep for a cache line */
478
479/* Shadow memory.
480 Primary map is a WordFM Addr SecMap*.
481 SecMaps cover some page-size-ish section of address space and hold
482 a compressed representation.
483 CacheLine-sized chunks of SecMaps are copied into a Cache, being
484 decompressed when moved into the cache and recompressed on the
485 way out. Because of this, the cache must operate as a writeback
486 cache, not a writethrough one.
487
488 Each SecMap must hold a power-of-2 number of CacheLines. Hence
489 N_SECMAP_BITS must >= N_LINE_BITS.
490*/
491#define N_SECMAP_BITS 13
492#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
493
494// # CacheLines held by a SecMap
495#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
496
497/* The data in the SecMap is held in the array of LineZs. Each LineZ
498 either carries the required data directly, in a compressed
499 representation, or it holds (in .dict[0]) an index to the LineF in
500 .linesF that holds the full representation.
501
502 Currently-unused LineF's have their .inUse bit set to zero.
503 Since each in-use LineF is referred to be exactly one LineZ,
504 the number of .linesZ[] that refer to .linesF should equal
505 the number of .linesF[] that have .inUse == True.
506
507 RC obligations: the RCs presented to the user include exactly
508 the values in:
509 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
510 * F reps that are in use (.inUse == True)
511
512 Hence the following actions at the following transitions are required:
513
514 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
515 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
516 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
517 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
518*/
519typedef
520 struct {
521 UInt magic;
522 LineZ linesZ[N_SECMAP_ZLINES];
523 LineF* linesF;
524 UInt linesF_size;
525 }
526 SecMap;
527
528#define SecMap_MAGIC 0x571e58cbU
529
philippef54cb662015-05-10 22:19:31 +0000530// (UInt) `echo "Free SecMap" | md5sum`
531#define SecMap_free_MAGIC 0x5a977f30U
532
sewardj5aa09bf2014-06-20 14:25:53 +0000533__attribute__((unused))
sewardjf98e1c02008-10-25 16:22:41 +0000534static inline Bool is_sane_SecMap ( SecMap* sm ) {
535 return sm != NULL && sm->magic == SecMap_MAGIC;
536}
537
538/* ------ Cache ------ */
539
540#define N_WAY_BITS 16
541#define N_WAY_NENT (1 << N_WAY_BITS)
542
543/* Each tag is the address of the associated CacheLine, rounded down
544 to a CacheLine address boundary. A CacheLine size must be a power
545 of 2 and must be 8 or more. Hence an easy way to initialise the
546 cache so it is empty is to set all the tag values to any value % 8
547 != 0, eg 1. This means all queries in the cache initially miss.
548 It does however require us to detect and not writeback, any line
549 with a bogus tag. */
550typedef
551 struct {
552 CacheLine lyns0[N_WAY_NENT];
553 Addr tags0[N_WAY_NENT];
554 }
555 Cache;
556
557static inline Bool is_valid_scache_tag ( Addr tag ) {
558 /* a valid tag should be naturally aligned to the start of
559 a CacheLine. */
560 return 0 == (tag & (N_LINE_ARANGE - 1));
561}
562
563
564/* --------- Primary data structures --------- */
565
566/* Shadow memory primary map */
567static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
568static Cache cache_shmem;
569
570
571static UWord stats__secmaps_search = 0; // # SM finds
572static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
573static UWord stats__secmaps_allocd = 0; // # SecMaps issued
philippef54cb662015-05-10 22:19:31 +0000574static UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live'
575static UWord stats__secmaps_scanGC = 0; // # nr of scan GC done.
576static UWord stats__secmaps_scanGCed = 0; // # SecMaps GC-ed via scan
577static UWord stats__secmaps_ssetGCed = 0; // # SecMaps GC-ed via setnoaccess
sewardjf98e1c02008-10-25 16:22:41 +0000578static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
579static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
580static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
581static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
582static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
sewardjf98e1c02008-10-25 16:22:41 +0000583static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
584static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
585static UWord stats__cache_F_fetches = 0; // # F lines fetched
586static UWord stats__cache_F_wbacks = 0; // # F lines written back
philippef54cb662015-05-10 22:19:31 +0000587static UWord stats__cache_flushes_invals = 0; // # cache flushes and invals
sewardjf98e1c02008-10-25 16:22:41 +0000588static UWord stats__cache_totrefs = 0; // # total accesses
589static UWord stats__cache_totmisses = 0; // # misses
590static ULong stats__cache_make_New_arange = 0; // total arange made New
591static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
592static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000593static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
594static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
595static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
596static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
597static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
598static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
599static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
600static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
601static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
602static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
603static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
604static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
605static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
606static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000607static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
608static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
609static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
610static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
611static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
612static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000613static UWord stats__vts__tick = 0; // # calls to VTS__tick
614static UWord stats__vts__join = 0; // # calls to VTS__join
615static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
616static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
philippef54cb662015-05-10 22:19:31 +0000617static UWord stats__vts_tab_GC = 0; // # nr of vts_tab GC
sewardj7aa38a92011-02-27 23:04:12 +0000618
619// # calls to VTS__cmp_structural w/ slow case
620static UWord stats__vts__cmp_structural_slow = 0;
621
622// # calls to VTS__indexAt_SLOW
623static UWord stats__vts__indexat_slow = 0;
624
625// # calls to vts_set__find__or__clone_and_add
626static UWord stats__vts_set__focaa = 0;
627
628// # calls to vts_set__find__or__clone_and_add that lead to an
629// allocation
630static UWord stats__vts_set__focaa_a = 0;
sewardjc8028ad2010-05-05 09:34:42 +0000631
sewardjf98e1c02008-10-25 16:22:41 +0000632
633static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
634 return a & ~(N_SECMAP_ARANGE - 1);
635}
636static inline UWord shmem__get_SecMap_offset ( Addr a ) {
637 return a & (N_SECMAP_ARANGE - 1);
638}
639
640
641/*----------------------------------------------------------------*/
642/*--- map_shmem :: WordFM Addr SecMap ---*/
643/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
644/*----------------------------------------------------------------*/
645
646/*--------------- SecMap allocation --------------- */
647
648static HChar* shmem__bigchunk_next = NULL;
649static HChar* shmem__bigchunk_end1 = NULL;
650
651static void* shmem__bigchunk_alloc ( SizeT n )
652{
653 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
654 tl_assert(n > 0);
655 n = VG_ROUNDUP(n, 16);
656 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
657 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
658 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
659 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
660 if (0)
661 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
662 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
663 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
664 if (shmem__bigchunk_next == NULL)
665 VG_(out_of_memory_NORETURN)(
666 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
667 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
668 }
669 tl_assert(shmem__bigchunk_next);
670 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
671 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
672 shmem__bigchunk_next += n;
673 return shmem__bigchunk_next - n;
674}
675
philippef54cb662015-05-10 22:19:31 +0000676/* SecMap changed to be fully SVal_NOACCESS are inserted in a list of
677 recycled SecMap. When a new SecMap is needed, a recycled SecMap
678 will be used in preference to allocating a new SecMap. */
679/* We make a linked list of SecMap. LinesF pointer is re-used to
680 implement the link list. */
681static SecMap *SecMap_freelist = NULL;
682static UWord SecMap_freelist_length(void)
683{
684 SecMap *sm;
685 UWord n = 0;
686
687 sm = SecMap_freelist;
688 while (sm) {
689 n++;
690 sm = (SecMap*)sm->linesF;
691 }
692 return n;
693}
694
695static void push_SecMap_on_freelist(SecMap* sm)
696{
697 if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm);
698 sm->magic = SecMap_free_MAGIC;
699 sm->linesF = (LineF*)SecMap_freelist;
700 SecMap_freelist = sm;
701}
702/* Returns a free SecMap if there is one.
703 Otherwise, returns NULL. */
704static SecMap *pop_SecMap_from_freelist(void)
705{
706 SecMap *sm;
707
708 sm = SecMap_freelist;
709 if (sm) {
710 tl_assert (sm->magic == SecMap_free_MAGIC);
711 SecMap_freelist = (SecMap*)sm->linesF;
712 if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm);
713 }
714 return sm;
715}
716
717static SecMap* shmem__alloc_or_recycle_SecMap ( void )
sewardjf98e1c02008-10-25 16:22:41 +0000718{
719 Word i, j;
philippef54cb662015-05-10 22:19:31 +0000720 SecMap* sm = pop_SecMap_from_freelist();
721
722 if (!sm) {
723 sm = shmem__bigchunk_alloc( sizeof(SecMap) );
724 stats__secmaps_allocd++;
725 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
726 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
727 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
728 }
sewardjf98e1c02008-10-25 16:22:41 +0000729 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
730 tl_assert(sm);
731 sm->magic = SecMap_MAGIC;
732 for (i = 0; i < N_SECMAP_ZLINES; i++) {
733 sm->linesZ[i].dict[0] = SVal_NOACCESS;
734 sm->linesZ[i].dict[1] = SVal_INVALID;
735 sm->linesZ[i].dict[2] = SVal_INVALID;
736 sm->linesZ[i].dict[3] = SVal_INVALID;
737 for (j = 0; j < N_LINE_ARANGE/4; j++)
738 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
739 }
740 sm->linesF = NULL;
741 sm->linesF_size = 0;
sewardjf98e1c02008-10-25 16:22:41 +0000742 return sm;
743}
744
745typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
746static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
747
748static SecMap* shmem__find_SecMap ( Addr ga )
749{
750 SecMap* sm = NULL;
751 Addr gaKey = shmem__round_to_SecMap_base(ga);
752 // Cache
753 stats__secmaps_search++;
754 if (LIKELY(gaKey == smCache[0].gaKey))
755 return smCache[0].sm;
756 if (LIKELY(gaKey == smCache[1].gaKey)) {
757 SMCacheEnt tmp = smCache[0];
758 smCache[0] = smCache[1];
759 smCache[1] = tmp;
760 return smCache[0].sm;
761 }
762 if (gaKey == smCache[2].gaKey) {
763 SMCacheEnt tmp = smCache[1];
764 smCache[1] = smCache[2];
765 smCache[2] = tmp;
766 return smCache[1].sm;
767 }
768 // end Cache
769 stats__secmaps_search_slow++;
770 if (VG_(lookupFM)( map_shmem,
771 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
772 tl_assert(sm != NULL);
773 smCache[2] = smCache[1];
774 smCache[1] = smCache[0];
775 smCache[0].gaKey = gaKey;
776 smCache[0].sm = sm;
777 } else {
778 tl_assert(sm == NULL);
779 }
780 return sm;
781}
782
philippef54cb662015-05-10 22:19:31 +0000783/* Scan the SecMap and count the SecMap that can be GC-ed.
784 If really, really does the GC of the SecMap. */
785/* NOT TO BE CALLED FROM WITHIN libzsm. */
786static UWord next_SecMap_GC_at = 1000;
787__attribute__((noinline))
788static UWord shmem__SecMap_do_GC(Bool really)
789{
790 UWord secmapW = 0;
791 Addr gaKey;
792 UWord examined = 0;
793 UWord ok_GCed = 0;
794
795 /* First invalidate the smCache */
796 smCache[0].gaKey = 1;
797 smCache[1].gaKey = 1;
798 smCache[2].gaKey = 1;
799 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0]));
800
801 VG_(initIterFM)( map_shmem );
802 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
803 UWord i;
804 UWord j;
805 SecMap* sm = (SecMap*)secmapW;
806 tl_assert(sm->magic == SecMap_MAGIC);
807 Bool ok_to_GC = True;
808
809 examined++;
810
811 /* Deal with the LineZs */
812 for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) {
813 LineZ* lineZ = &sm->linesZ[i];
814 ok_to_GC = lineZ->dict[0] == SVal_INVALID
815 || (lineZ->dict[0] == SVal_NOACCESS
816 && !SVal__isC (lineZ->dict[1])
817 && !SVal__isC (lineZ->dict[2])
818 && !SVal__isC (lineZ->dict[3]));
819 }
820 /* Deal with the LineFs */
821 for (i = 0; i < sm->linesF_size && ok_to_GC; i++) {
822 LineF* lineF = &sm->linesF[i];
823 if (!lineF->inUse)
824 continue;
825 for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++)
826 ok_to_GC = lineF->w64s[j] == SVal_NOACCESS;
827 }
828 if (ok_to_GC)
829 ok_GCed++;
830 if (ok_to_GC && really) {
831 SecMap *fm_sm;
832 Addr fm_gaKey;
833 /* We cannot remove a SecMap from map_shmem while iterating.
834 So, stop iteration, remove from map_shmem, recreate the iteration
835 on the next SecMap. */
836 VG_(doneIterFM) ( map_shmem );
837 /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS or
838 not in use. We just need to free the linesF. */
839 if (sm->linesF_size > 0) {
840 HG_(free)(sm->linesF);
841 stats__secmap_linesF_allocd -= sm->linesF_size;
842 stats__secmap_linesF_bytes -= sm->linesF_size * sizeof(LineF);
843 }
844 if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey))
845 tl_assert (0);
846 stats__secmaps_in_map_shmem--;
847 tl_assert (gaKey == fm_gaKey);
848 tl_assert (sm == fm_sm);
849 stats__secmaps_scanGCed++;
850 push_SecMap_on_freelist (sm);
851 VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE);
852 }
853 }
854 VG_(doneIterFM)( map_shmem );
855
856 if (really) {
857 stats__secmaps_scanGC++;
858 /* Next GC when we approach the max allocated */
859 next_SecMap_GC_at = stats__secmaps_allocd - 1000;
860 /* Unless we GCed less than 10%. We then allow to alloc 10%
861 more before GCing. This avoids doing a lot of costly GC
862 for the worst case : the 'growing phase' of an application
863 that allocates a lot of memory.
864 Worst can can be reproduced e.g. by
865 perf/memrw -t 30000000 -b 1000 -r 1 -l 1
866 that allocates around 30Gb of memory. */
867 if (ok_GCed < stats__secmaps_allocd/10)
868 next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10;
869
870 }
871
872 if (VG_(clo_stats) && really) {
873 VG_(message)(Vg_DebugMsg,
874 "libhb: SecMap GC: #%lu scanned %lu, GCed %lu,"
875 " next GC at %lu\n",
876 stats__secmaps_scanGC, examined, ok_GCed,
877 next_SecMap_GC_at);
878 }
879
880 return ok_GCed;
881}
882
sewardjf98e1c02008-10-25 16:22:41 +0000883static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
884{
885 SecMap* sm = shmem__find_SecMap ( ga );
886 if (LIKELY(sm)) {
philippef54cb662015-05-10 22:19:31 +0000887 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000888 return sm;
889 } else {
890 /* create a new one */
891 Addr gaKey = shmem__round_to_SecMap_base(ga);
philippef54cb662015-05-10 22:19:31 +0000892 sm = shmem__alloc_or_recycle_SecMap();
sewardjf98e1c02008-10-25 16:22:41 +0000893 tl_assert(sm);
894 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
philippef54cb662015-05-10 22:19:31 +0000895 stats__secmaps_in_map_shmem++;
896 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000897 return sm;
898 }
899}
900
901
902/* ------------ LineF and LineZ related ------------ */
903
904static void rcinc_LineF ( LineF* lineF ) {
905 UWord i;
906 tl_assert(lineF->inUse);
907 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000908 SVal__rcinc(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000909}
910
911static void rcdec_LineF ( LineF* lineF ) {
912 UWord i;
913 tl_assert(lineF->inUse);
914 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000915 SVal__rcdec(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000916}
917
918static void rcinc_LineZ ( LineZ* lineZ ) {
919 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000920 SVal__rcinc(lineZ->dict[0]);
921 if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]);
922 if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]);
923 if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000924}
925
926static void rcdec_LineZ ( LineZ* lineZ ) {
927 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000928 SVal__rcdec(lineZ->dict[0]);
929 if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]);
930 if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]);
931 if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000932}
933
934inline
935static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
936 Word bix, shft, mask, prep;
937 tl_assert(ix >= 0);
938 bix = ix >> 2;
939 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
940 mask = 3 << shft;
941 prep = b2 << shft;
942 arr[bix] = (arr[bix] & ~mask) | prep;
943}
944
945inline
946static UWord read_twobit_array ( UChar* arr, UWord ix ) {
947 Word bix, shft;
948 tl_assert(ix >= 0);
949 bix = ix >> 2;
950 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
951 return (arr[bix] >> shft) & 3;
952}
953
954/* Given address 'tag', find either the Z or F line containing relevant
955 data, so it can be read into the cache.
956*/
957static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
958 /*OUT*/LineF** fp, Addr tag ) {
959 LineZ* lineZ;
960 LineF* lineF;
961 UWord zix;
962 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
963 UWord smoff = shmem__get_SecMap_offset(tag);
964 /* since smoff is derived from a valid tag, it should be
965 cacheline-aligned. */
966 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
967 zix = smoff >> N_LINE_BITS;
968 tl_assert(zix < N_SECMAP_ZLINES);
969 lineZ = &sm->linesZ[zix];
970 lineF = NULL;
971 if (lineZ->dict[0] == SVal_INVALID) {
972 UInt fix = (UInt)lineZ->dict[1];
973 tl_assert(sm->linesF);
974 tl_assert(sm->linesF_size > 0);
975 tl_assert(fix >= 0 && fix < sm->linesF_size);
976 lineF = &sm->linesF[fix];
977 tl_assert(lineF->inUse);
978 lineZ = NULL;
979 }
980 *zp = lineZ;
981 *fp = lineF;
982}
983
984/* Given address 'tag', return the relevant SecMap and the index of
985 the LineZ within it, in the expectation that the line is to be
986 overwritten. Regardless of whether 'tag' is currently associated
987 with a Z or F representation, to rcdec on the current
988 representation, in recognition of the fact that the contents are
989 just about to be overwritten. */
990static __attribute__((noinline))
991void find_Z_for_writing ( /*OUT*/SecMap** smp,
992 /*OUT*/Word* zixp,
993 Addr tag ) {
994 LineZ* lineZ;
995 LineF* lineF;
996 UWord zix;
997 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
998 UWord smoff = shmem__get_SecMap_offset(tag);
999 /* since smoff is derived from a valid tag, it should be
1000 cacheline-aligned. */
1001 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1002 zix = smoff >> N_LINE_BITS;
1003 tl_assert(zix < N_SECMAP_ZLINES);
1004 lineZ = &sm->linesZ[zix];
1005 lineF = NULL;
1006 /* re RCs, we are freeing up this LineZ/LineF so that new data can
1007 be parked in it. Hence have to rcdec it accordingly. */
1008 /* If lineZ has an associated lineF, free it up. */
1009 if (lineZ->dict[0] == SVal_INVALID) {
1010 UInt fix = (UInt)lineZ->dict[1];
1011 tl_assert(sm->linesF);
1012 tl_assert(sm->linesF_size > 0);
1013 tl_assert(fix >= 0 && fix < sm->linesF_size);
1014 lineF = &sm->linesF[fix];
1015 tl_assert(lineF->inUse);
1016 rcdec_LineF(lineF);
1017 lineF->inUse = False;
1018 } else {
1019 rcdec_LineZ(lineZ);
1020 }
1021 *smp = sm;
1022 *zixp = zix;
1023}
1024
1025static __attribute__((noinline))
1026void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
1027 UInt i, new_size;
1028 LineF* nyu;
1029
1030 if (sm->linesF) {
1031 tl_assert(sm->linesF_size > 0);
1032 } else {
1033 tl_assert(sm->linesF_size == 0);
1034 }
1035
1036 if (sm->linesF) {
1037 for (i = 0; i < sm->linesF_size; i++) {
1038 if (!sm->linesF[i].inUse) {
1039 *fixp = (Word)i;
1040 return;
1041 }
1042 }
1043 }
1044
1045 /* No free F line found. Expand existing array and try again. */
1046 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
1047 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
1048 new_size * sizeof(LineF) );
sewardjf98e1c02008-10-25 16:22:41 +00001049
1050 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
1051 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
1052 * sizeof(LineF);
1053
1054 if (0)
1055 VG_(printf)("SM %p: expand F array from %d to %d\n",
1056 sm, (Int)sm->linesF_size, new_size);
1057
1058 for (i = 0; i < new_size; i++)
1059 nyu[i].inUse = False;
1060
1061 if (sm->linesF) {
1062 for (i = 0; i < sm->linesF_size; i++) {
1063 tl_assert(sm->linesF[i].inUse);
1064 nyu[i] = sm->linesF[i];
1065 }
1066 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
1067 HG_(free)(sm->linesF);
1068 }
1069
1070 sm->linesF = nyu;
1071 sm->linesF_size = new_size;
1072
1073 for (i = 0; i < sm->linesF_size; i++) {
1074 if (!sm->linesF[i].inUse) {
1075 *fixp = (Word)i;
1076 return;
1077 }
philippe47124e92015-04-25 14:00:24 +00001078 }
sewardjf98e1c02008-10-25 16:22:41 +00001079
philippe47124e92015-04-25 14:00:24 +00001080 /*NOTREACHED*/
1081 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00001082}
1083
1084
1085/* ------------ CacheLine and implicit-tree related ------------ */
1086
1087__attribute__((unused))
1088static void pp_CacheLine ( CacheLine* cl ) {
1089 Word i;
1090 if (!cl) {
1091 VG_(printf)("%s","pp_CacheLine(NULL)\n");
1092 return;
1093 }
1094 for (i = 0; i < N_LINE_TREES; i++)
1095 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
1096 for (i = 0; i < N_LINE_ARANGE; i++)
1097 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
1098}
1099
1100static UChar descr_to_validbits ( UShort descr )
1101{
1102 /* a.k.a Party Time for gcc's constant folder */
1103# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
1104 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
1105 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
1106 ( (b8_5) << 12) | ( (b8_4) << 11) | \
1107 ( (b8_3) << 10) | ( (b8_2) << 9) | \
1108 ( (b8_1) << 8) | ( (b8_0) << 7) | \
1109 ( (b16_3) << 6) | ( (b32_1) << 5) | \
1110 ( (b16_2) << 4) | ( (b64) << 3) | \
1111 ( (b16_1) << 2) | ( (b32_0) << 1) | \
1112 ( (b16_0) << 0) ) )
1113
1114# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
1115 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
1116 ( (bit5) << 5) | ( (bit4) << 4) | \
1117 ( (bit3) << 3) | ( (bit2) << 2) | \
1118 ( (bit1) << 1) | ( (bit0) << 0) ) )
1119
1120 /* these should all get folded out at compile time */
1121 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
1122 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
1123 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
1124 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
1125 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
1126 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
1127 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
1128 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
1129 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
1130
1131 switch (descr) {
1132 /*
1133 +--------------------------------- TREE_DESCR_8_7
1134 | +------------------- TREE_DESCR_8_0
1135 | | +---------------- TREE_DESCR_16_3
1136 | | | +-------------- TREE_DESCR_32_1
1137 | | | | +------------ TREE_DESCR_16_2
1138 | | | | | +--------- TREE_DESCR_64
1139 | | | | | | +------ TREE_DESCR_16_1
1140 | | | | | | | +---- TREE_DESCR_32_0
1141 | | | | | | | | +-- TREE_DESCR_16_0
1142 | | | | | | | | |
1143 | | | | | | | | | GRANULARITY, 7 -> 0 */
1144 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
1145 return BYTE(1,1,1,1,1,1,1,1);
1146 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
1147 return BYTE(1,1,0,1,1,1,1,1);
1148 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
1149 return BYTE(0,1,1,1,1,1,1,1);
1150 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
1151 return BYTE(0,1,0,1,1,1,1,1);
1152
1153 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
1154 return BYTE(1,1,1,1,1,1,0,1);
1155 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
1156 return BYTE(1,1,0,1,1,1,0,1);
1157 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
1158 return BYTE(0,1,1,1,1,1,0,1);
1159 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
1160 return BYTE(0,1,0,1,1,1,0,1);
1161
1162 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
1163 return BYTE(1,1,1,1,0,1,1,1);
1164 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1165 return BYTE(1,1,0,1,0,1,1,1);
1166 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1167 return BYTE(0,1,1,1,0,1,1,1);
1168 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1169 return BYTE(0,1,0,1,0,1,1,1);
1170
1171 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1172 return BYTE(1,1,1,1,0,1,0,1);
1173 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1174 return BYTE(1,1,0,1,0,1,0,1);
1175 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1176 return BYTE(0,1,1,1,0,1,0,1);
1177 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1178 return BYTE(0,1,0,1,0,1,0,1);
1179
1180 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1181 return BYTE(0,0,0,1,1,1,1,1);
1182 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1183 return BYTE(0,0,0,1,1,1,0,1);
1184 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1185 return BYTE(0,0,0,1,0,1,1,1);
1186 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1187 return BYTE(0,0,0,1,0,1,0,1);
1188
1189 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1190 return BYTE(1,1,1,1,0,0,0,1);
1191 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1192 return BYTE(1,1,0,1,0,0,0,1);
1193 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1194 return BYTE(0,1,1,1,0,0,0,1);
1195 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1196 return BYTE(0,1,0,1,0,0,0,1);
1197
1198 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1199 return BYTE(0,0,0,1,0,0,0,1);
1200
1201 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1202 return BYTE(0,0,0,0,0,0,0,1);
1203
1204 default: return BYTE(0,0,0,0,0,0,0,0);
1205 /* INVALID - any valid descr produces at least one
1206 valid bit in tree[0..7]*/
1207 }
1208 /* NOTREACHED*/
1209 tl_assert(0);
1210
1211# undef DESCR
1212# undef BYTE
1213}
1214
1215__attribute__((unused))
1216static Bool is_sane_Descr ( UShort descr ) {
1217 return descr_to_validbits(descr) != 0;
1218}
1219
1220static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1221 VG_(sprintf)(dst,
1222 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1223 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1224 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1225 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1226 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1227 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1228 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1229 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1230 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1231 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1232 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1233 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1234 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1235 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1236 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1237 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1238 );
1239}
1240static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1241 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1242 (Int)((byte & 128) ? 1 : 0),
1243 (Int)((byte & 64) ? 1 : 0),
1244 (Int)((byte & 32) ? 1 : 0),
1245 (Int)((byte & 16) ? 1 : 0),
1246 (Int)((byte & 8) ? 1 : 0),
1247 (Int)((byte & 4) ? 1 : 0),
1248 (Int)((byte & 2) ? 1 : 0),
1249 (Int)((byte & 1) ? 1 : 0)
1250 );
1251}
1252
1253static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1254 Word i;
1255 UChar validbits = descr_to_validbits(descr);
florian7b7d5942014-12-19 20:29:22 +00001256 HChar buf[128], buf2[128]; // large enough
sewardjf98e1c02008-10-25 16:22:41 +00001257 if (validbits == 0)
1258 goto bad;
1259 for (i = 0; i < 8; i++) {
1260 if (validbits & (1<<i)) {
1261 if (tree[i] == SVal_INVALID)
1262 goto bad;
1263 } else {
1264 if (tree[i] != SVal_INVALID)
1265 goto bad;
1266 }
1267 }
1268 return True;
1269 bad:
1270 sprintf_Descr( buf, descr );
1271 sprintf_Byte( buf2, validbits );
1272 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1273 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1274 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1275 for (i = 0; i < 8; i++)
1276 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1277 VG_(printf)("%s","}\n");
1278 return 0;
1279}
1280
1281static Bool is_sane_CacheLine ( CacheLine* cl )
1282{
1283 Word tno, cloff;
1284
1285 if (!cl) goto bad;
1286
1287 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1288 UShort descr = cl->descrs[tno];
1289 SVal* tree = &cl->svals[cloff];
1290 if (!is_sane_Descr_and_Tree(descr, tree))
1291 goto bad;
1292 }
1293 tl_assert(cloff == N_LINE_ARANGE);
1294 return True;
1295 bad:
1296 pp_CacheLine(cl);
1297 return False;
1298}
1299
1300static UShort normalise_tree ( /*MOD*/SVal* tree )
1301{
1302 UShort descr;
1303 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1304 particular no zeroes. */
philippe1475a7f2015-05-11 19:45:08 +00001305 if (CHECK_ZSM
1306 && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1307 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1308 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1309 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
sewardjf98e1c02008-10-25 16:22:41 +00001310 tl_assert(0);
1311
1312 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1313 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1314 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1315 /* build 16-bit layer */
1316 if (tree[1] == tree[0]) {
1317 tree[1] = SVal_INVALID;
1318 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1319 descr |= TREE_DESCR_16_0;
1320 }
1321 if (tree[3] == tree[2]) {
1322 tree[3] = SVal_INVALID;
1323 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1324 descr |= TREE_DESCR_16_1;
1325 }
1326 if (tree[5] == tree[4]) {
1327 tree[5] = SVal_INVALID;
1328 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1329 descr |= TREE_DESCR_16_2;
1330 }
1331 if (tree[7] == tree[6]) {
1332 tree[7] = SVal_INVALID;
1333 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1334 descr |= TREE_DESCR_16_3;
1335 }
1336 /* build 32-bit layer */
1337 if (tree[2] == tree[0]
1338 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1339 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1340 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1341 descr |= TREE_DESCR_32_0;
1342 }
1343 if (tree[6] == tree[4]
1344 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1345 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1346 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1347 descr |= TREE_DESCR_32_1;
1348 }
1349 /* build 64-bit layer */
1350 if (tree[4] == tree[0]
1351 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1352 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1353 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1354 descr |= TREE_DESCR_64;
1355 }
1356 return descr;
1357}
1358
1359/* This takes a cacheline where all the data is at the leaves
1360 (w8[..]) and builds a correctly normalised tree. */
1361static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1362{
1363 Word tno, cloff;
1364 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1365 SVal* tree = &cl->svals[cloff];
1366 cl->descrs[tno] = normalise_tree( tree );
1367 }
1368 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +00001369 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001370 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1371 stats__cline_normalises++;
1372}
1373
1374
1375typedef struct { UChar count; SVal sval; } CountedSVal;
1376
1377static
1378void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1379 /*OUT*/Word* dstUsedP,
1380 Word nDst, CacheLine* src )
1381{
1382 Word tno, cloff, dstUsed;
1383
1384 tl_assert(nDst == N_LINE_ARANGE);
1385 dstUsed = 0;
1386
1387 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1388 UShort descr = src->descrs[tno];
1389 SVal* tree = &src->svals[cloff];
1390
1391 /* sequentialise the tree described by (descr,tree). */
1392# define PUT(_n,_v) \
1393 do { dst[dstUsed ].count = (_n); \
1394 dst[dstUsed++].sval = (_v); \
1395 } while (0)
1396
1397 /* byte 0 */
1398 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1399 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1400 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1401 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1402 /* byte 1 */
1403 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1404 /* byte 2 */
1405 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1406 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1407 /* byte 3 */
1408 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1409 /* byte 4 */
1410 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1411 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1412 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1413 /* byte 5 */
1414 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1415 /* byte 6 */
1416 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1417 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1418 /* byte 7 */
1419 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1420
1421# undef PUT
1422 /* END sequentialise the tree described by (descr,tree). */
1423
1424 }
1425 tl_assert(cloff == N_LINE_ARANGE);
1426 tl_assert(dstUsed <= nDst);
1427
1428 *dstUsedP = dstUsed;
1429}
1430
1431/* Write the cacheline 'wix' to backing store. Where it ends up
1432 is determined by its tag field. */
1433static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1434{
1435 Word i, j, k, m;
1436 Addr tag;
1437 SecMap* sm;
1438 CacheLine* cl;
1439 LineZ* lineZ;
1440 LineF* lineF;
1441 Word zix, fix, csvalsUsed;
1442 CountedSVal csvals[N_LINE_ARANGE];
1443 SVal sv;
1444
1445 if (0)
1446 VG_(printf)("scache wback line %d\n", (Int)wix);
1447
1448 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1449
1450 tag = cache_shmem.tags0[wix];
1451 cl = &cache_shmem.lyns0[wix];
1452
1453 /* The cache line may have been invalidated; if so, ignore it. */
1454 if (!is_valid_scache_tag(tag))
1455 return;
1456
1457 /* Where are we going to put it? */
1458 sm = NULL;
1459 lineZ = NULL;
1460 lineF = NULL;
1461 zix = fix = -1;
1462
1463 /* find the Z line to write in and rcdec it or the associated F
1464 line. */
1465 find_Z_for_writing( &sm, &zix, tag );
1466
1467 tl_assert(sm);
1468 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1469 lineZ = &sm->linesZ[zix];
1470
1471 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001472 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001473 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1474
1475 csvalsUsed = -1;
1476 sequentialise_CacheLine( csvals, &csvalsUsed,
1477 N_LINE_ARANGE, cl );
1478 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1479 if (0) VG_(printf)("%lu ", csvalsUsed);
1480
1481 lineZ->dict[0] = lineZ->dict[1]
1482 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1483
1484 /* i indexes actual shadow values, k is cursor in csvals */
1485 i = 0;
1486 for (k = 0; k < csvalsUsed; k++) {
1487
1488 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001489 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001490 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1491 /* do we already have it? */
1492 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1493 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1494 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1495 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1496 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001497 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001498 tl_assert(sv != SVal_INVALID);
1499 if (lineZ->dict[0]
1500 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1501 if (lineZ->dict[1]
1502 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1503 if (lineZ->dict[2]
1504 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1505 if (lineZ->dict[3]
1506 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1507 break; /* we'll have to use the f rep */
1508 dict_ok:
1509 m = csvals[k].count;
1510 if (m == 8) {
1511 write_twobit_array( lineZ->ix2s, i+0, j );
1512 write_twobit_array( lineZ->ix2s, i+1, j );
1513 write_twobit_array( lineZ->ix2s, i+2, j );
1514 write_twobit_array( lineZ->ix2s, i+3, j );
1515 write_twobit_array( lineZ->ix2s, i+4, j );
1516 write_twobit_array( lineZ->ix2s, i+5, j );
1517 write_twobit_array( lineZ->ix2s, i+6, j );
1518 write_twobit_array( lineZ->ix2s, i+7, j );
1519 i += 8;
1520 }
1521 else if (m == 4) {
1522 write_twobit_array( lineZ->ix2s, i+0, j );
1523 write_twobit_array( lineZ->ix2s, i+1, j );
1524 write_twobit_array( lineZ->ix2s, i+2, j );
1525 write_twobit_array( lineZ->ix2s, i+3, j );
1526 i += 4;
1527 }
1528 else if (m == 1) {
1529 write_twobit_array( lineZ->ix2s, i+0, j );
1530 i += 1;
1531 }
1532 else if (m == 2) {
1533 write_twobit_array( lineZ->ix2s, i+0, j );
1534 write_twobit_array( lineZ->ix2s, i+1, j );
1535 i += 2;
1536 }
1537 else {
1538 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1539 }
1540
1541 }
1542
1543 if (LIKELY(i == N_LINE_ARANGE)) {
1544 /* Construction of the compressed representation was
1545 successful. */
1546 rcinc_LineZ(lineZ);
1547 stats__cache_Z_wbacks++;
1548 } else {
1549 /* Cannot use the compressed(z) representation. Use the full(f)
1550 rep instead. */
1551 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1552 alloc_F_for_writing( sm, &fix );
1553 tl_assert(sm->linesF);
1554 tl_assert(sm->linesF_size > 0);
1555 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1556 lineF = &sm->linesF[fix];
1557 tl_assert(!lineF->inUse);
1558 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1559 lineZ->dict[1] = (SVal)fix;
1560 lineF->inUse = True;
1561 i = 0;
1562 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001563 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001564 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1565 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001566 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001567 tl_assert(sv != SVal_INVALID);
1568 for (m = csvals[k].count; m > 0; m--) {
1569 lineF->w64s[i] = sv;
1570 i++;
1571 }
1572 }
1573 tl_assert(i == N_LINE_ARANGE);
1574 rcinc_LineF(lineF);
1575 stats__cache_F_wbacks++;
1576 }
sewardjf98e1c02008-10-25 16:22:41 +00001577}
1578
1579/* Fetch the cacheline 'wix' from the backing store. The tag
1580 associated with 'wix' is assumed to have already been filled in;
1581 hence that is used to determine where in the backing store to read
1582 from. */
1583static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1584{
1585 Word i;
1586 Addr tag;
1587 CacheLine* cl;
1588 LineZ* lineZ;
1589 LineF* lineF;
1590
1591 if (0)
1592 VG_(printf)("scache fetch line %d\n", (Int)wix);
1593
1594 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1595
1596 tag = cache_shmem.tags0[wix];
1597 cl = &cache_shmem.lyns0[wix];
1598
1599 /* reject nonsense requests */
1600 tl_assert(is_valid_scache_tag(tag));
1601
1602 lineZ = NULL;
1603 lineF = NULL;
1604 find_ZF_for_reading( &lineZ, &lineF, tag );
1605 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1606
1607 /* expand the data into the bottom layer of the tree, then get
1608 cacheline_normalise to build the descriptor array. */
1609 if (lineF) {
1610 tl_assert(lineF->inUse);
1611 for (i = 0; i < N_LINE_ARANGE; i++) {
1612 cl->svals[i] = lineF->w64s[i];
1613 }
1614 stats__cache_F_fetches++;
1615 } else {
1616 for (i = 0; i < N_LINE_ARANGE; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00001617 UWord ix = read_twobit_array( lineZ->ix2s, i );
philippe1475a7f2015-05-11 19:45:08 +00001618 if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3);
1619 cl->svals[i] = lineZ->dict[ix];
1620 if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00001621 }
1622 stats__cache_Z_fetches++;
1623 }
1624 normalise_CacheLine( cl );
1625}
1626
philippe8939e092015-05-11 20:18:10 +00001627/* Invalid the cachelines corresponding to the given range, which
1628 must start and end on a cacheline boundary. */
philippef54cb662015-05-10 22:19:31 +00001629static void shmem__invalidate_scache_range (Addr ga, SizeT szB)
1630{
philippef54cb662015-05-10 22:19:31 +00001631 Word wix;
1632
philippe8939e092015-05-11 20:18:10 +00001633 /* ga must be on a cacheline boundary. */
1634 tl_assert (is_valid_scache_tag (ga));
1635 /* szB must be a multiple of cacheline size. */
1636 tl_assert (0 == (szB & (N_LINE_ARANGE - 1)));
1637
1638
philippef54cb662015-05-10 22:19:31 +00001639 Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1);
1640 Word nwix = szB / N_LINE_ARANGE;
1641
1642 if (nwix > N_WAY_NENT)
1643 nwix = N_WAY_NENT; // no need to check several times the same entry.
1644
1645 for (wix = 0; wix < nwix; wix++) {
1646 if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB))
1647 cache_shmem.tags0[ga_ix] = 1/*INVALID*/;
1648 ga_ix++;
1649 if (ga_ix == N_WAY_NENT)
1650 ga_ix = 0;
1651 }
sewardjf98e1c02008-10-25 16:22:41 +00001652}
1653
philippef54cb662015-05-10 22:19:31 +00001654
sewardjf98e1c02008-10-25 16:22:41 +00001655static void shmem__flush_and_invalidate_scache ( void ) {
1656 Word wix;
1657 Addr tag;
1658 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1659 tl_assert(!is_valid_scache_tag(1));
1660 for (wix = 0; wix < N_WAY_NENT; wix++) {
1661 tag = cache_shmem.tags0[wix];
1662 if (tag == 1/*INVALID*/) {
1663 /* already invalid; nothing to do */
1664 } else {
1665 tl_assert(is_valid_scache_tag(tag));
1666 cacheline_wback( wix );
1667 }
1668 cache_shmem.tags0[wix] = 1/*INVALID*/;
1669 }
philippef54cb662015-05-10 22:19:31 +00001670 stats__cache_flushes_invals++;
sewardjf98e1c02008-10-25 16:22:41 +00001671}
1672
1673
1674static inline Bool aligned16 ( Addr a ) {
1675 return 0 == (a & 1);
1676}
1677static inline Bool aligned32 ( Addr a ) {
1678 return 0 == (a & 3);
1679}
1680static inline Bool aligned64 ( Addr a ) {
1681 return 0 == (a & 7);
1682}
1683static inline UWord get_cacheline_offset ( Addr a ) {
1684 return (UWord)(a & (N_LINE_ARANGE - 1));
1685}
1686static inline Addr cacheline_ROUNDUP ( Addr a ) {
1687 return ROUNDUP(a, N_LINE_ARANGE);
1688}
1689static inline Addr cacheline_ROUNDDN ( Addr a ) {
1690 return ROUNDDN(a, N_LINE_ARANGE);
1691}
1692static inline UWord get_treeno ( Addr a ) {
1693 return get_cacheline_offset(a) >> 3;
1694}
1695static inline UWord get_tree_offset ( Addr a ) {
1696 return a & 7;
1697}
1698
1699static __attribute__((noinline))
1700 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1701static inline CacheLine* get_cacheline ( Addr a )
1702{
1703 /* tag is 'a' with the in-line offset masked out,
1704 eg a[31]..a[4] 0000 */
1705 Addr tag = a & ~(N_LINE_ARANGE - 1);
1706 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1707 stats__cache_totrefs++;
1708 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1709 return &cache_shmem.lyns0[wix];
1710 } else {
1711 return get_cacheline_MISS( a );
1712 }
1713}
1714
1715static __attribute__((noinline))
1716 CacheLine* get_cacheline_MISS ( Addr a )
1717{
1718 /* tag is 'a' with the in-line offset masked out,
1719 eg a[31]..a[4] 0000 */
1720
1721 CacheLine* cl;
1722 Addr* tag_old_p;
1723 Addr tag = a & ~(N_LINE_ARANGE - 1);
1724 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1725
1726 tl_assert(tag != cache_shmem.tags0[wix]);
1727
1728 /* Dump the old line into the backing store. */
1729 stats__cache_totmisses++;
1730
1731 cl = &cache_shmem.lyns0[wix];
1732 tag_old_p = &cache_shmem.tags0[wix];
1733
1734 if (is_valid_scache_tag( *tag_old_p )) {
1735 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001736 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001737 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1738 cacheline_wback( wix );
1739 }
1740 /* and reload the new one */
1741 *tag_old_p = tag;
1742 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001743 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001744 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1745 return cl;
1746}
1747
1748static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1749 stats__cline_64to32pulldown++;
1750 switch (toff) {
1751 case 0: case 4:
1752 tl_assert(descr & TREE_DESCR_64);
1753 tree[4] = tree[0];
1754 descr &= ~TREE_DESCR_64;
1755 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1756 break;
1757 default:
1758 tl_assert(0);
1759 }
1760 return descr;
1761}
1762
1763static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1764 stats__cline_32to16pulldown++;
1765 switch (toff) {
1766 case 0: case 2:
1767 if (!(descr & TREE_DESCR_32_0)) {
1768 descr = pulldown_to_32(tree, 0, descr);
1769 }
1770 tl_assert(descr & TREE_DESCR_32_0);
1771 tree[2] = tree[0];
1772 descr &= ~TREE_DESCR_32_0;
1773 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1774 break;
1775 case 4: case 6:
1776 if (!(descr & TREE_DESCR_32_1)) {
1777 descr = pulldown_to_32(tree, 4, descr);
1778 }
1779 tl_assert(descr & TREE_DESCR_32_1);
1780 tree[6] = tree[4];
1781 descr &= ~TREE_DESCR_32_1;
1782 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1783 break;
1784 default:
1785 tl_assert(0);
1786 }
1787 return descr;
1788}
1789
1790static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1791 stats__cline_16to8pulldown++;
1792 switch (toff) {
1793 case 0: case 1:
1794 if (!(descr & TREE_DESCR_16_0)) {
1795 descr = pulldown_to_16(tree, 0, descr);
1796 }
1797 tl_assert(descr & TREE_DESCR_16_0);
1798 tree[1] = tree[0];
1799 descr &= ~TREE_DESCR_16_0;
1800 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1801 break;
1802 case 2: case 3:
1803 if (!(descr & TREE_DESCR_16_1)) {
1804 descr = pulldown_to_16(tree, 2, descr);
1805 }
1806 tl_assert(descr & TREE_DESCR_16_1);
1807 tree[3] = tree[2];
1808 descr &= ~TREE_DESCR_16_1;
1809 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1810 break;
1811 case 4: case 5:
1812 if (!(descr & TREE_DESCR_16_2)) {
1813 descr = pulldown_to_16(tree, 4, descr);
1814 }
1815 tl_assert(descr & TREE_DESCR_16_2);
1816 tree[5] = tree[4];
1817 descr &= ~TREE_DESCR_16_2;
1818 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1819 break;
1820 case 6: case 7:
1821 if (!(descr & TREE_DESCR_16_3)) {
1822 descr = pulldown_to_16(tree, 6, descr);
1823 }
1824 tl_assert(descr & TREE_DESCR_16_3);
1825 tree[7] = tree[6];
1826 descr &= ~TREE_DESCR_16_3;
1827 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1828 break;
1829 default:
1830 tl_assert(0);
1831 }
1832 return descr;
1833}
1834
1835
1836static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1837 UShort mask;
1838 switch (toff) {
1839 case 0:
1840 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1841 tl_assert( (descr & mask) == mask );
1842 descr &= ~mask;
1843 descr |= TREE_DESCR_16_0;
1844 break;
1845 case 2:
1846 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1847 tl_assert( (descr & mask) == mask );
1848 descr &= ~mask;
1849 descr |= TREE_DESCR_16_1;
1850 break;
1851 case 4:
1852 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1853 tl_assert( (descr & mask) == mask );
1854 descr &= ~mask;
1855 descr |= TREE_DESCR_16_2;
1856 break;
1857 case 6:
1858 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1859 tl_assert( (descr & mask) == mask );
1860 descr &= ~mask;
1861 descr |= TREE_DESCR_16_3;
1862 break;
1863 default:
1864 tl_assert(0);
1865 }
1866 return descr;
1867}
1868
1869static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1870 UShort mask;
1871 switch (toff) {
1872 case 0:
1873 if (!(descr & TREE_DESCR_16_0))
1874 descr = pullup_descr_to_16(descr, 0);
1875 if (!(descr & TREE_DESCR_16_1))
1876 descr = pullup_descr_to_16(descr, 2);
1877 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1878 tl_assert( (descr & mask) == mask );
1879 descr &= ~mask;
1880 descr |= TREE_DESCR_32_0;
1881 break;
1882 case 4:
1883 if (!(descr & TREE_DESCR_16_2))
1884 descr = pullup_descr_to_16(descr, 4);
1885 if (!(descr & TREE_DESCR_16_3))
1886 descr = pullup_descr_to_16(descr, 6);
1887 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1888 tl_assert( (descr & mask) == mask );
1889 descr &= ~mask;
1890 descr |= TREE_DESCR_32_1;
1891 break;
1892 default:
1893 tl_assert(0);
1894 }
1895 return descr;
1896}
1897
1898static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1899 switch (toff) {
1900 case 0: case 4:
1901 return 0 != (descr & TREE_DESCR_64);
1902 default:
1903 tl_assert(0);
1904 }
1905}
1906
1907static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1908 switch (toff) {
1909 case 0:
1910 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1911 case 2:
1912 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1913 case 4:
1914 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1915 case 6:
1916 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1917 default:
1918 tl_assert(0);
1919 }
1920}
1921
1922/* ------------ Cache management ------------ */
1923
1924static void zsm_flush_cache ( void )
1925{
1926 shmem__flush_and_invalidate_scache();
1927}
1928
1929
philippe1475a7f2015-05-11 19:45:08 +00001930static void zsm_init ( void )
sewardjf98e1c02008-10-25 16:22:41 +00001931{
1932 tl_assert( sizeof(UWord) == sizeof(Addr) );
1933
sewardjf98e1c02008-10-25 16:22:41 +00001934 tl_assert(map_shmem == NULL);
1935 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1936 HG_(free),
1937 NULL/*unboxed UWord cmp*/);
philippef54cb662015-05-10 22:19:31 +00001938 /* Invalidate all cache entries. */
1939 tl_assert(!is_valid_scache_tag(1));
1940 for (UWord wix = 0; wix < N_WAY_NENT; wix++) {
1941 cache_shmem.tags0[wix] = 1/*INVALID*/;
1942 }
sewardjf98e1c02008-10-25 16:22:41 +00001943
1944 /* a SecMap must contain an integral number of CacheLines */
1945 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1946 /* also ... a CacheLine holds an integral number of trees */
1947 tl_assert(0 == (N_LINE_ARANGE % 8));
1948}
1949
1950/////////////////////////////////////////////////////////////////
1951/////////////////////////////////////////////////////////////////
1952// //
1953// SECTION END compressed shadow memory //
1954// //
1955/////////////////////////////////////////////////////////////////
1956/////////////////////////////////////////////////////////////////
1957
1958
1959
1960/////////////////////////////////////////////////////////////////
1961/////////////////////////////////////////////////////////////////
1962// //
1963// SECTION BEGIN vts primitives //
1964// //
1965/////////////////////////////////////////////////////////////////
1966/////////////////////////////////////////////////////////////////
1967
sewardjf98e1c02008-10-25 16:22:41 +00001968
sewardje4cce742011-02-24 15:25:24 +00001969/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1970 being compact stand-ins for Thr*'s. Use these functions to map
1971 between them. */
1972static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
1973static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
1974
sewardje4cce742011-02-24 15:25:24 +00001975__attribute__((noreturn))
1976static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
1977{
1978 if (due_to_nThrs) {
florian6bf37262012-10-21 03:23:36 +00001979 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001980 "\n"
1981 "Helgrind: cannot continue, run aborted: too many threads.\n"
1982 "Sorry. Helgrind can only handle programs that create\n"
1983 "%'llu or fewer threads over their entire lifetime.\n"
1984 "\n";
sewardj03e7d272011-05-04 09:08:34 +00001985 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
sewardje4cce742011-02-24 15:25:24 +00001986 } else {
florian6bf37262012-10-21 03:23:36 +00001987 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001988 "\n"
1989 "Helgrind: cannot continue, run aborted: too many\n"
1990 "synchronisation events. Sorry. Helgrind can only handle\n"
1991 "programs which perform %'llu or fewer\n"
1992 "inter-thread synchronisation events (locks, unlocks, etc).\n"
1993 "\n";
1994 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
1995 }
1996 VG_(exit)(1);
1997 /*NOTREACHED*/
1998 tl_assert(0); /*wtf?!*/
1999}
2000
2001
philippec3508652015-03-28 12:01:58 +00002002/* The dead thread (ThrID, actually) tables. A thread may only be
sewardjffce8152011-06-24 10:09:41 +00002003 listed here if we have been notified thereof by libhb_async_exit.
2004 New entries are added at the end. The order isn't important, but
philippec3508652015-03-28 12:01:58 +00002005 the ThrID values must be unique.
2006 verydead_thread_table_not_pruned lists the identity of the threads
2007 that died since the previous round of pruning.
2008 Once pruning is done, these ThrID are added in verydead_thread_table.
2009 We don't actually need to keep the set of threads that have ever died --
sewardjffce8152011-06-24 10:09:41 +00002010 only the threads that have died since the previous round of
2011 pruning. But it's useful for sanity check purposes to keep the
2012 entire set, so we do. */
philippec3508652015-03-28 12:01:58 +00002013static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
sewardjffce8152011-06-24 10:09:41 +00002014static XArray* /* of ThrID */ verydead_thread_table = NULL;
2015
2016/* Arbitrary total ordering on ThrIDs. */
florian6bd9dc12012-11-23 16:17:43 +00002017static Int cmp__ThrID ( const void* v1, const void* v2 ) {
2018 ThrID id1 = *(const ThrID*)v1;
2019 ThrID id2 = *(const ThrID*)v2;
sewardjffce8152011-06-24 10:09:41 +00002020 if (id1 < id2) return -1;
2021 if (id1 > id2) return 1;
2022 return 0;
2023}
2024
philippec3508652015-03-28 12:01:58 +00002025static void verydead_thread_tables_init ( void )
sewardjffce8152011-06-24 10:09:41 +00002026{
2027 tl_assert(!verydead_thread_table);
philippec3508652015-03-28 12:01:58 +00002028 tl_assert(!verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00002029 verydead_thread_table
2030 = VG_(newXA)( HG_(zalloc),
2031 "libhb.verydead_thread_table_init.1",
2032 HG_(free), sizeof(ThrID) );
sewardjffce8152011-06-24 10:09:41 +00002033 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
philippec3508652015-03-28 12:01:58 +00002034 verydead_thread_table_not_pruned
2035 = VG_(newXA)( HG_(zalloc),
2036 "libhb.verydead_thread_table_init.2",
2037 HG_(free), sizeof(ThrID) );
2038 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
sewardjffce8152011-06-24 10:09:41 +00002039}
2040
philippec3508652015-03-28 12:01:58 +00002041static void verydead_thread_table_sort_and_check (XArray* thrids)
2042{
2043 UWord i;
2044
2045 VG_(sortXA)( thrids );
2046 /* Sanity check: check for unique .sts.thr values. */
2047 UWord nBT = VG_(sizeXA)( thrids );
2048 if (nBT > 0) {
2049 ThrID thrid1, thrid2;
2050 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
2051 for (i = 1; i < nBT; i++) {
2052 thrid1 = thrid2;
2053 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
2054 tl_assert(thrid1 < thrid2);
2055 }
2056 }
2057 /* Ok, so the dead thread table thrids has unique and in-order keys. */
2058}
sewardjf98e1c02008-10-25 16:22:41 +00002059
2060/* A VTS contains .ts, its vector clock, and also .id, a field to hold
2061 a backlink for the caller's convenience. Since we have no idea
2062 what to set that to in the library, it always gets set to
2063 VtsID_INVALID. */
2064typedef
2065 struct {
sewardj7aa38a92011-02-27 23:04:12 +00002066 VtsID id;
2067 UInt usedTS;
2068 UInt sizeTS;
2069 ScalarTS ts[0];
sewardjf98e1c02008-10-25 16:22:41 +00002070 }
2071 VTS;
2072
sewardj7aa38a92011-02-27 23:04:12 +00002073/* Allocate a VTS capable of storing 'sizeTS' entries. */
florian6bd9dc12012-11-23 16:17:43 +00002074static VTS* VTS__new ( const HChar* who, UInt sizeTS );
sewardjf98e1c02008-10-25 16:22:41 +00002075
sewardjffce8152011-06-24 10:09:41 +00002076/* Make a clone of 'vts', sizing the new array to exactly match the
sewardj7aa38a92011-02-27 23:04:12 +00002077 number of ScalarTSs present. */
florian6bd9dc12012-11-23 16:17:43 +00002078static VTS* VTS__clone ( const HChar* who, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002079
sewardjffce8152011-06-24 10:09:41 +00002080/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
2081 array is sized exactly to hold the number of required elements.
2082 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
2083 must be in strictly increasing order. */
florian6bd9dc12012-11-23 16:17:43 +00002084static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
sewardjffce8152011-06-24 10:09:41 +00002085
sewardjf98e1c02008-10-25 16:22:41 +00002086/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00002087static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002088
sewardj7aa38a92011-02-27 23:04:12 +00002089/* Create a new singleton VTS in 'out'. Caller must have
2090 pre-allocated 'out' sufficiently big to hold the result in all
2091 possible cases. */
2092static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00002093
sewardj7aa38a92011-02-27 23:04:12 +00002094/* Create in 'out' a VTS which is the same as 'vts' except with
2095 vts[me]++, so to speak. Caller must have pre-allocated 'out'
2096 sufficiently big to hold the result in all possible cases. */
2097static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002098
sewardj7aa38a92011-02-27 23:04:12 +00002099/* Create in 'out' a VTS which is the join (max) of 'a' and
2100 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
2101 the result in all possible cases. */
2102static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002103
sewardj23f12002009-07-24 08:45:08 +00002104/* Compute the partial ordering relation of the two args. Although we
2105 could be completely general and return an enumeration value (EQ,
2106 LT, GT, UN), in fact we only need LEQ, and so we may as well
2107 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00002108
sewardje4cce742011-02-24 15:25:24 +00002109 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
2110 invald ThrID). In the latter case, the returned ThrID indicates
2111 the discovered point for which they are not. There may be more
2112 than one such point, but we only care about seeing one of them, not
2113 all of them. This rather strange convention is used because
2114 sometimes we want to know the actual index at which they first
2115 differ. */
2116static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002117
2118/* Compute an arbitrary structural (total) ordering on the two args,
2119 based on their VCs, so they can be looked up in a table, tree, etc.
2120 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00002121static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002122
florianb28fe892014-10-28 20:52:07 +00002123/* Debugging only. Display the given VTS. */
2124static void VTS__show ( const VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002125
2126/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00002127static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002128
sewardjffce8152011-06-24 10:09:41 +00002129/* Notify the VTS machinery that a thread has been declared
2130 comprehensively dead: that is, it has done an async exit AND it has
2131 been joined with. This should ensure that its local clocks (.viR
2132 and .viW) will never again change, and so all mentions of this
2133 thread from all VTSs in the system may be removed. */
2134static void VTS__declare_thread_very_dead ( Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002135
2136/*--------------- to do with Vector Timestamps ---------------*/
2137
sewardjf98e1c02008-10-25 16:22:41 +00002138static Bool is_sane_VTS ( VTS* vts )
2139{
2140 UWord i, n;
2141 ScalarTS *st1, *st2;
2142 if (!vts) return False;
sewardj555fc572011-02-27 23:39:53 +00002143 if (vts->usedTS > vts->sizeTS) return False;
sewardj7aa38a92011-02-27 23:04:12 +00002144 n = vts->usedTS;
2145 if (n == 1) {
2146 st1 = &vts->ts[0];
2147 if (st1->tym == 0)
2148 return False;
2149 }
2150 else
sewardjf98e1c02008-10-25 16:22:41 +00002151 if (n >= 2) {
2152 for (i = 0; i < n-1; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002153 st1 = &vts->ts[i];
2154 st2 = &vts->ts[i+1];
sewardje4cce742011-02-24 15:25:24 +00002155 if (st1->thrid >= st2->thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002156 return False;
2157 if (st1->tym == 0 || st2->tym == 0)
2158 return False;
2159 }
2160 }
2161 return True;
2162}
2163
2164
sewardj7aa38a92011-02-27 23:04:12 +00002165/* Create a new, empty VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002166*/
florian6bd9dc12012-11-23 16:17:43 +00002167static VTS* VTS__new ( const HChar* who, UInt sizeTS )
sewardjf98e1c02008-10-25 16:22:41 +00002168{
sewardj7aa38a92011-02-27 23:04:12 +00002169 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
2170 tl_assert(vts->usedTS == 0);
2171 vts->sizeTS = sizeTS;
2172 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
sewardjf98e1c02008-10-25 16:22:41 +00002173 return vts;
2174}
2175
sewardj7aa38a92011-02-27 23:04:12 +00002176/* Clone this VTS.
2177*/
florian6bd9dc12012-11-23 16:17:43 +00002178static VTS* VTS__clone ( const HChar* who, VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002179{
2180 tl_assert(vts);
2181 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2182 UInt nTS = vts->usedTS;
2183 VTS* clone = VTS__new(who, nTS);
2184 clone->id = vts->id;
2185 clone->sizeTS = nTS;
2186 clone->usedTS = nTS;
2187 UInt i;
2188 for (i = 0; i < nTS; i++) {
2189 clone->ts[i] = vts->ts[i];
2190 }
2191 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2192 return clone;
2193}
2194
sewardjf98e1c02008-10-25 16:22:41 +00002195
sewardjffce8152011-06-24 10:09:41 +00002196/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
2197 must be in strictly increasing order. We could obviously do this
2198 much more efficiently (in linear time) if necessary.
2199*/
florian6bd9dc12012-11-23 16:17:43 +00002200static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
sewardjffce8152011-06-24 10:09:41 +00002201{
2202 UInt i, j;
2203 tl_assert(vts);
2204 tl_assert(thridsToDel);
2205 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2206 UInt nTS = vts->usedTS;
2207 /* Figure out how many ScalarTSs will remain in the output. */
2208 UInt nReq = nTS;
2209 for (i = 0; i < nTS; i++) {
2210 ThrID thrid = vts->ts[i].thrid;
2211 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2212 nReq--;
2213 }
2214 tl_assert(nReq <= nTS);
2215 /* Copy the ones that will remain. */
2216 VTS* res = VTS__new(who, nReq);
2217 j = 0;
2218 for (i = 0; i < nTS; i++) {
2219 ThrID thrid = vts->ts[i].thrid;
2220 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2221 continue;
2222 res->ts[j++] = vts->ts[i];
2223 }
2224 tl_assert(j == nReq);
2225 tl_assert(j == res->sizeTS);
2226 res->usedTS = j;
2227 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2228 return res;
2229}
2230
2231
sewardjf98e1c02008-10-25 16:22:41 +00002232/* Delete this VTS in its entirety.
2233*/
sewardj7aa38a92011-02-27 23:04:12 +00002234static void VTS__delete ( VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002235{
2236 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002237 tl_assert(vts->usedTS <= vts->sizeTS);
2238 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
sewardjf98e1c02008-10-25 16:22:41 +00002239 HG_(free)(vts);
2240}
2241
2242
2243/* Create a new singleton VTS.
2244*/
sewardj7aa38a92011-02-27 23:04:12 +00002245static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2246{
sewardjf98e1c02008-10-25 16:22:41 +00002247 tl_assert(thr);
2248 tl_assert(tym >= 1);
sewardj7aa38a92011-02-27 23:04:12 +00002249 tl_assert(out);
2250 tl_assert(out->usedTS == 0);
2251 tl_assert(out->sizeTS >= 1);
2252 UInt hi = out->usedTS++;
2253 out->ts[hi].thrid = Thr__to_ThrID(thr);
2254 out->ts[hi].tym = tym;
sewardjf98e1c02008-10-25 16:22:41 +00002255}
2256
2257
2258/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2259 not modified.
2260*/
sewardj7aa38a92011-02-27 23:04:12 +00002261static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002262{
sewardj7aa38a92011-02-27 23:04:12 +00002263 UInt i, n;
sewardje4cce742011-02-24 15:25:24 +00002264 ThrID me_thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002265 Bool found = False;
sewardjc8028ad2010-05-05 09:34:42 +00002266
2267 stats__vts__tick++;
2268
sewardj7aa38a92011-02-27 23:04:12 +00002269 tl_assert(out);
2270 tl_assert(out->usedTS == 0);
2271 if (vts->usedTS >= ThrID_MAX_VALID)
2272 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2273 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2274
sewardjf98e1c02008-10-25 16:22:41 +00002275 tl_assert(me);
sewardje4cce742011-02-24 15:25:24 +00002276 me_thrid = Thr__to_ThrID(me);
sewardjf98e1c02008-10-25 16:22:41 +00002277 tl_assert(is_sane_VTS(vts));
sewardj7aa38a92011-02-27 23:04:12 +00002278 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002279
sewardj555fc572011-02-27 23:39:53 +00002280 /* Copy all entries which precede 'me'. */
2281 for (i = 0; i < n; i++) {
2282 ScalarTS* here = &vts->ts[i];
2283 if (UNLIKELY(here->thrid >= me_thrid))
2284 break;
2285 UInt hi = out->usedTS++;
2286 out->ts[hi] = *here;
2287 }
2288
2289 /* 'i' now indicates the next entry to copy, if any.
2290 There are 3 possibilities:
2291 (a) there is no next entry (we used them all up already):
2292 add (me_thrid,1) to the output, and quit
2293 (b) there is a next entry, and its thrid > me_thrid:
2294 add (me_thrid,1) to the output, then copy the remaining entries
2295 (c) there is a next entry, and its thrid == me_thrid:
2296 copy it to the output but increment its timestamp value.
2297 Then copy the remaining entries. (c) is the common case.
2298 */
2299 tl_assert(i >= 0 && i <= n);
2300 if (i == n) { /* case (a) */
sewardj7aa38a92011-02-27 23:04:12 +00002301 UInt hi = out->usedTS++;
2302 out->ts[hi].thrid = me_thrid;
2303 out->ts[hi].tym = 1;
sewardj555fc572011-02-27 23:39:53 +00002304 } else {
2305 /* cases (b) and (c) */
2306 ScalarTS* here = &vts->ts[i];
2307 if (me_thrid == here->thrid) { /* case (c) */
sewardj7aa38a92011-02-27 23:04:12 +00002308 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
sewardje4cce742011-02-24 15:25:24 +00002309 /* We're hosed. We have to stop. */
2310 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2311 }
sewardj7aa38a92011-02-27 23:04:12 +00002312 UInt hi = out->usedTS++;
2313 out->ts[hi].thrid = here->thrid;
2314 out->ts[hi].tym = here->tym + 1;
sewardjf98e1c02008-10-25 16:22:41 +00002315 i++;
sewardj555fc572011-02-27 23:39:53 +00002316 found = True;
2317 } else { /* case (b) */
sewardj7aa38a92011-02-27 23:04:12 +00002318 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002319 out->ts[hi].thrid = me_thrid;
2320 out->ts[hi].tym = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002321 }
sewardj555fc572011-02-27 23:39:53 +00002322 /* And copy any remaining entries. */
sewardjf98e1c02008-10-25 16:22:41 +00002323 for (/*keepgoing*/; i < n; i++) {
sewardj555fc572011-02-27 23:39:53 +00002324 ScalarTS* here2 = &vts->ts[i];
sewardj7aa38a92011-02-27 23:04:12 +00002325 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002326 out->ts[hi] = *here2;
sewardjf98e1c02008-10-25 16:22:41 +00002327 }
2328 }
sewardj555fc572011-02-27 23:39:53 +00002329
sewardj7aa38a92011-02-27 23:04:12 +00002330 tl_assert(is_sane_VTS(out));
2331 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2332 tl_assert(out->usedTS <= out->sizeTS);
sewardjf98e1c02008-10-25 16:22:41 +00002333}
2334
2335
2336/* Return a new VTS constructed as the join (max) of the 2 args.
2337 Neither arg is modified.
2338*/
sewardj7aa38a92011-02-27 23:04:12 +00002339static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002340{
sewardj7aa38a92011-02-27 23:04:12 +00002341 UInt ia, ib, useda, usedb;
sewardjf98e1c02008-10-25 16:22:41 +00002342 ULong tyma, tymb, tymMax;
sewardje4cce742011-02-24 15:25:24 +00002343 ThrID thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002344 UInt ncommon = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002345
sewardjc8028ad2010-05-05 09:34:42 +00002346 stats__vts__join++;
2347
sewardj7aa38a92011-02-27 23:04:12 +00002348 tl_assert(a);
2349 tl_assert(b);
2350 useda = a->usedTS;
2351 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002352
sewardj7aa38a92011-02-27 23:04:12 +00002353 tl_assert(out);
2354 tl_assert(out->usedTS == 0);
2355 /* overly conservative test, but doing better involves comparing
2356 the two VTSs, which we don't want to do at this point. */
2357 if (useda + usedb >= ThrID_MAX_VALID)
2358 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2359 tl_assert(out->sizeTS >= useda + usedb);
2360
sewardjf98e1c02008-10-25 16:22:41 +00002361 ia = ib = 0;
2362
2363 while (1) {
2364
sewardje4cce742011-02-24 15:25:24 +00002365 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2366 from a and b in order, where thrid is the next ThrID
sewardjf98e1c02008-10-25 16:22:41 +00002367 occurring in either a or b, and tyma/b are the relevant
2368 scalar timestamps, taking into account implicit zeroes. */
2369 tl_assert(ia >= 0 && ia <= useda);
2370 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002371
njn4c245e52009-03-15 23:25:38 +00002372 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002373 /* both empty - done */
2374 break;
njn4c245e52009-03-15 23:25:38 +00002375
2376 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002377 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002378 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002379 thrid = tmpb->thrid;
2380 tyma = 0;
2381 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002382 ib++;
njn4c245e52009-03-15 23:25:38 +00002383
2384 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002385 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002386 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002387 thrid = tmpa->thrid;
2388 tyma = tmpa->tym;
2389 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002390 ia++;
njn4c245e52009-03-15 23:25:38 +00002391
2392 } else {
sewardje4cce742011-02-24 15:25:24 +00002393 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002394 ScalarTS* tmpa = &a->ts[ia];
2395 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002396 if (tmpa->thrid < tmpb->thrid) {
2397 /* a has the lowest unconsidered ThrID */
2398 thrid = tmpa->thrid;
2399 tyma = tmpa->tym;
2400 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002401 ia++;
sewardje4cce742011-02-24 15:25:24 +00002402 } else if (tmpa->thrid > tmpb->thrid) {
2403 /* b has the lowest unconsidered ThrID */
2404 thrid = tmpb->thrid;
2405 tyma = 0;
2406 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002407 ib++;
2408 } else {
sewardje4cce742011-02-24 15:25:24 +00002409 /* they both next mention the same ThrID */
2410 tl_assert(tmpa->thrid == tmpb->thrid);
2411 thrid = tmpa->thrid; /* == tmpb->thrid */
2412 tyma = tmpa->tym;
2413 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002414 ia++;
2415 ib++;
sewardj7aa38a92011-02-27 23:04:12 +00002416 ncommon++;
sewardjf98e1c02008-10-25 16:22:41 +00002417 }
2418 }
2419
2420 /* having laboriously determined (thr, tyma, tymb), do something
2421 useful with it. */
2422 tymMax = tyma > tymb ? tyma : tymb;
2423 if (tymMax > 0) {
sewardj7aa38a92011-02-27 23:04:12 +00002424 UInt hi = out->usedTS++;
2425 out->ts[hi].thrid = thrid;
2426 out->ts[hi].tym = tymMax;
sewardjf98e1c02008-10-25 16:22:41 +00002427 }
2428
2429 }
2430
sewardj7aa38a92011-02-27 23:04:12 +00002431 tl_assert(is_sane_VTS(out));
2432 tl_assert(out->usedTS <= out->sizeTS);
2433 tl_assert(out->usedTS == useda + usedb - ncommon);
sewardjf98e1c02008-10-25 16:22:41 +00002434}
2435
2436
sewardje4cce742011-02-24 15:25:24 +00002437/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2438 they are, or the first ThrID for which they are not (no valid ThrID
2439 has the value zero). This rather strange convention is used
2440 because sometimes we want to know the actual index at which they
2441 first differ. */
2442static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002443{
sewardj23f12002009-07-24 08:45:08 +00002444 Word ia, ib, useda, usedb;
2445 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00002446
sewardjc8028ad2010-05-05 09:34:42 +00002447 stats__vts__cmpLEQ++;
2448
sewardj7aa38a92011-02-27 23:04:12 +00002449 tl_assert(a);
2450 tl_assert(b);
2451 useda = a->usedTS;
2452 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002453
2454 ia = ib = 0;
2455
2456 while (1) {
2457
njn4c245e52009-03-15 23:25:38 +00002458 /* This logic is to enumerate doubles (tyma, tymb) drawn
2459 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00002460 scalar timestamps, taking into account implicit zeroes. */
sewardje4cce742011-02-24 15:25:24 +00002461 ThrID thrid;
sewardj23f12002009-07-24 08:45:08 +00002462
sewardjf98e1c02008-10-25 16:22:41 +00002463 tl_assert(ia >= 0 && ia <= useda);
2464 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002465
njn4c245e52009-03-15 23:25:38 +00002466 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002467 /* both empty - done */
2468 break;
njn4c245e52009-03-15 23:25:38 +00002469
2470 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002471 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002472 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002473 tyma = 0;
2474 tymb = tmpb->tym;
2475 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002476 ib++;
njn4c245e52009-03-15 23:25:38 +00002477
2478 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002479 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002480 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002481 tyma = tmpa->tym;
2482 thrid = tmpa->thrid;
2483 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002484 ia++;
njn4c245e52009-03-15 23:25:38 +00002485
2486 } else {
sewardje4cce742011-02-24 15:25:24 +00002487 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002488 ScalarTS* tmpa = &a->ts[ia];
2489 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002490 if (tmpa->thrid < tmpb->thrid) {
2491 /* a has the lowest unconsidered ThrID */
2492 tyma = tmpa->tym;
2493 thrid = tmpa->thrid;
2494 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002495 ia++;
2496 }
2497 else
sewardje4cce742011-02-24 15:25:24 +00002498 if (tmpa->thrid > tmpb->thrid) {
2499 /* b has the lowest unconsidered ThrID */
2500 tyma = 0;
2501 tymb = tmpb->tym;
2502 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002503 ib++;
2504 } else {
sewardje4cce742011-02-24 15:25:24 +00002505 /* they both next mention the same ThrID */
2506 tl_assert(tmpa->thrid == tmpb->thrid);
2507 tyma = tmpa->tym;
2508 thrid = tmpa->thrid;
2509 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002510 ia++;
2511 ib++;
2512 }
2513 }
2514
njn4c245e52009-03-15 23:25:38 +00002515 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00002516 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00002517 if (tyma > tymb) {
2518 /* not LEQ at this index. Quit, since the answer is
2519 determined already. */
sewardje4cce742011-02-24 15:25:24 +00002520 tl_assert(thrid >= 1024);
2521 return thrid;
sewardj23f12002009-07-24 08:45:08 +00002522 }
sewardjf98e1c02008-10-25 16:22:41 +00002523 }
2524
sewardje4cce742011-02-24 15:25:24 +00002525 return 0; /* all points are LEQ => return an invalid ThrID */
sewardjf98e1c02008-10-25 16:22:41 +00002526}
2527
2528
2529/* Compute an arbitrary structural (total) ordering on the two args,
2530 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00002531 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2532 performance critical so there is some effort expended to make it sa
2533 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00002534*/
2535Word VTS__cmp_structural ( VTS* a, VTS* b )
2536{
2537 /* We just need to generate an arbitrary total ordering based on
2538 a->ts and b->ts. Preferably do it in a way which comes across likely
2539 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00002540 Word i;
2541 Word useda = 0, usedb = 0;
2542 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002543
sewardjc8028ad2010-05-05 09:34:42 +00002544 stats__vts__cmp_structural++;
2545
2546 tl_assert(a);
2547 tl_assert(b);
2548
sewardj7aa38a92011-02-27 23:04:12 +00002549 ctsa = &a->ts[0]; useda = a->usedTS;
2550 ctsb = &b->ts[0]; usedb = b->usedTS;
sewardjc8028ad2010-05-05 09:34:42 +00002551
2552 if (LIKELY(useda == usedb)) {
2553 ScalarTS *tmpa = NULL, *tmpb = NULL;
2554 stats__vts__cmp_structural_slow++;
2555 /* Same length vectors. Find the first difference, if any, as
2556 fast as possible. */
2557 for (i = 0; i < useda; i++) {
2558 tmpa = &ctsa[i];
2559 tmpb = &ctsb[i];
sewardje4cce742011-02-24 15:25:24 +00002560 if (LIKELY(tmpa->tym == tmpb->tym
2561 && tmpa->thrid == tmpb->thrid))
sewardjc8028ad2010-05-05 09:34:42 +00002562 continue;
2563 else
2564 break;
2565 }
2566 if (UNLIKELY(i == useda)) {
2567 /* They're identical. */
2568 return 0;
2569 } else {
2570 tl_assert(i >= 0 && i < useda);
2571 if (tmpa->tym < tmpb->tym) return -1;
2572 if (tmpa->tym > tmpb->tym) return 1;
sewardje4cce742011-02-24 15:25:24 +00002573 if (tmpa->thrid < tmpb->thrid) return -1;
2574 if (tmpa->thrid > tmpb->thrid) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002575 /* we just established them as non-identical, hence: */
2576 }
2577 /*NOTREACHED*/
2578 tl_assert(0);
2579 }
sewardjf98e1c02008-10-25 16:22:41 +00002580
2581 if (useda < usedb) return -1;
2582 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002583 /*NOTREACHED*/
2584 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00002585}
2586
2587
florianb28fe892014-10-28 20:52:07 +00002588/* Debugging only. Display the given VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002589*/
florianb28fe892014-10-28 20:52:07 +00002590static void VTS__show ( const VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002591{
sewardjf98e1c02008-10-25 16:22:41 +00002592 Word i, n;
florian4367abe2015-02-28 09:22:09 +00002593 tl_assert(vts);
florianb28fe892014-10-28 20:52:07 +00002594
2595 VG_(printf)("[");
sewardj7aa38a92011-02-27 23:04:12 +00002596 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002597 for (i = 0; i < n; i++) {
florianb28fe892014-10-28 20:52:07 +00002598 const ScalarTS *st = &vts->ts[i];
2599 VG_(printf)(i < n-1 ? "%u:%llu " : "%u:%llu", st->thrid, (ULong)st->tym);
sewardjf98e1c02008-10-25 16:22:41 +00002600 }
florianb28fe892014-10-28 20:52:07 +00002601 VG_(printf)("]");
sewardjf98e1c02008-10-25 16:22:41 +00002602}
2603
2604
2605/* Debugging only. Return vts[index], so to speak.
2606*/
sewardj7aa38a92011-02-27 23:04:12 +00002607ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2608{
sewardjf98e1c02008-10-25 16:22:41 +00002609 UWord i, n;
sewardje4cce742011-02-24 15:25:24 +00002610 ThrID idx_thrid = Thr__to_ThrID(idx);
sewardjc8028ad2010-05-05 09:34:42 +00002611 stats__vts__indexat_slow++;
florian4367abe2015-02-28 09:22:09 +00002612 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002613 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002614 for (i = 0; i < n; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002615 ScalarTS* st = &vts->ts[i];
sewardje4cce742011-02-24 15:25:24 +00002616 if (st->thrid == idx_thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002617 return st->tym;
2618 }
2619 return 0;
2620}
2621
2622
sewardjffce8152011-06-24 10:09:41 +00002623/* See comment on prototype above.
2624*/
2625static void VTS__declare_thread_very_dead ( Thr* thr )
2626{
2627 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2628
2629 tl_assert(thr->llexit_done);
2630 tl_assert(thr->joinedwith_done);
2631
2632 ThrID nyu;
2633 nyu = Thr__to_ThrID(thr);
philippec3508652015-03-28 12:01:58 +00002634 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
sewardjffce8152011-06-24 10:09:41 +00002635
2636 /* We can only get here if we're assured that we'll never again
2637 need to look at this thread's ::viR or ::viW. Set them to
2638 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2639 mostly so that we don't wind up pruning them (as that would be
2640 nonsensical: the only interesting ScalarTS entry for a dead
2641 thread is its own index, and the pruning will remove that.). */
2642 VtsID__rcdec(thr->viR);
2643 VtsID__rcdec(thr->viW);
2644 thr->viR = VtsID_INVALID;
2645 thr->viW = VtsID_INVALID;
2646}
2647
2648
sewardjf98e1c02008-10-25 16:22:41 +00002649/////////////////////////////////////////////////////////////////
2650/////////////////////////////////////////////////////////////////
2651// //
2652// SECTION END vts primitives //
2653// //
2654/////////////////////////////////////////////////////////////////
2655/////////////////////////////////////////////////////////////////
2656
2657
2658
2659/////////////////////////////////////////////////////////////////
2660/////////////////////////////////////////////////////////////////
2661// //
2662// SECTION BEGIN main library //
2663// //
2664/////////////////////////////////////////////////////////////////
2665/////////////////////////////////////////////////////////////////
2666
2667
2668/////////////////////////////////////////////////////////
2669// //
2670// VTS set //
2671// //
2672/////////////////////////////////////////////////////////
2673
sewardjffce8152011-06-24 10:09:41 +00002674static WordFM* /* WordFM VTS* void */ vts_set = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002675
2676static void vts_set_init ( void )
2677{
2678 tl_assert(!vts_set);
2679 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2680 HG_(free),
2681 (Word(*)(UWord,UWord))VTS__cmp_structural );
sewardjf98e1c02008-10-25 16:22:41 +00002682}
2683
sewardj7aa38a92011-02-27 23:04:12 +00002684/* Given a VTS, look in vts_set to see if we already have a
2685 structurally identical one. If yes, return the pair (True, pointer
2686 to the existing one). If no, clone this one, add the clone to the
2687 set, and return (False, pointer to the clone). */
2688static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002689{
2690 UWord keyW, valW;
sewardj7aa38a92011-02-27 23:04:12 +00002691 stats__vts_set__focaa++;
2692 tl_assert(cand->id == VtsID_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00002693 /* lookup cand (by value) */
2694 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2695 /* found it */
2696 tl_assert(valW == 0);
2697 /* if this fails, cand (by ref) was already present (!) */
2698 tl_assert(keyW != (UWord)cand);
sewardj7aa38a92011-02-27 23:04:12 +00002699 *res = (VTS*)keyW;
2700 return True;
sewardjf98e1c02008-10-25 16:22:41 +00002701 } else {
sewardj7aa38a92011-02-27 23:04:12 +00002702 /* not present. Clone, add and return address of clone. */
2703 stats__vts_set__focaa_a++;
2704 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2705 tl_assert(clone != cand);
2706 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2707 *res = clone;
2708 return False;
sewardjf98e1c02008-10-25 16:22:41 +00002709 }
2710}
2711
2712
2713/////////////////////////////////////////////////////////
2714// //
2715// VTS table //
2716// //
2717/////////////////////////////////////////////////////////
2718
2719static void VtsID__invalidate_caches ( void ); /* fwds */
2720
2721/* A type to hold VTS table entries. Invariants:
2722 If .vts == NULL, then this entry is not in use, so:
2723 - .rc == 0
2724 - this entry is on the freelist (unfortunately, does not imply
philippea1ac2f42015-05-01 17:12:00 +00002725 any constraints on value for u.freelink)
sewardjf98e1c02008-10-25 16:22:41 +00002726 If .vts != NULL, then this entry is in use:
2727 - .vts is findable in vts_set
2728 - .vts->id == this entry number
2729 - no specific value for .rc (even 0 is OK)
philippea1ac2f42015-05-01 17:12:00 +00002730 - this entry is not on freelist, so u.freelink == VtsID_INVALID
sewardjf98e1c02008-10-25 16:22:41 +00002731*/
2732typedef
2733 struct {
2734 VTS* vts; /* vts, in vts_set */
2735 UWord rc; /* reference count - enough for entire aspace */
philippea1ac2f42015-05-01 17:12:00 +00002736 union {
2737 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2738 VtsID remap; /* used only during pruning, for used entries */
2739 } u;
2740 /* u.freelink only used when vts == NULL,
2741 u.remap only used when vts != NULL, during pruning. */
sewardjf98e1c02008-10-25 16:22:41 +00002742 }
2743 VtsTE;
2744
2745/* The VTS table. */
2746static XArray* /* of VtsTE */ vts_tab = NULL;
2747
2748/* An index into the VTS table, indicating the start of the list of
2749 free (available for use) entries. If the list is empty, this is
2750 VtsID_INVALID. */
2751static VtsID vts_tab_freelist = VtsID_INVALID;
2752
2753/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2754 vts_tab equals or exceeds this size. After GC, the value here is
2755 set appropriately so as to check for the next GC point. */
2756static Word vts_next_GC_at = 1000;
2757
2758static void vts_tab_init ( void )
2759{
florian91ed8cc2014-09-15 18:50:17 +00002760 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2761 HG_(free), sizeof(VtsTE) );
2762 vts_tab_freelist = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002763}
2764
2765/* Add ii to the free list, checking that it looks out-of-use. */
2766static void add_to_free_list ( VtsID ii )
2767{
2768 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2769 tl_assert(ie->vts == NULL);
2770 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002771 tl_assert(ie->u.freelink == VtsID_INVALID);
2772 ie->u.freelink = vts_tab_freelist;
sewardjf98e1c02008-10-25 16:22:41 +00002773 vts_tab_freelist = ii;
2774}
2775
2776/* Get an entry from the free list. This will return VtsID_INVALID if
2777 the free list is empty. */
2778static VtsID get_from_free_list ( void )
2779{
2780 VtsID ii;
2781 VtsTE* ie;
2782 if (vts_tab_freelist == VtsID_INVALID)
2783 return VtsID_INVALID;
2784 ii = vts_tab_freelist;
2785 ie = VG_(indexXA)( vts_tab, ii );
2786 tl_assert(ie->vts == NULL);
2787 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002788 vts_tab_freelist = ie->u.freelink;
sewardjf98e1c02008-10-25 16:22:41 +00002789 return ii;
2790}
2791
2792/* Produce a new VtsID that can be used, either by getting it from
2793 the freelist, or, if that is empty, by expanding vts_tab. */
2794static VtsID get_new_VtsID ( void )
2795{
2796 VtsID ii;
2797 VtsTE te;
2798 ii = get_from_free_list();
2799 if (ii != VtsID_INVALID)
2800 return ii;
2801 te.vts = NULL;
2802 te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002803 te.u.freelink = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002804 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2805 return ii;
2806}
2807
2808
2809/* Indirect callback from lib_zsm. */
2810static void VtsID__rcinc ( VtsID ii )
2811{
2812 VtsTE* ie;
2813 /* VG_(indexXA) does a range check for us */
2814 ie = VG_(indexXA)( vts_tab, ii );
2815 tl_assert(ie->vts); /* else it's not in use */
2816 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2817 tl_assert(ie->vts->id == ii);
2818 ie->rc++;
2819}
2820
2821/* Indirect callback from lib_zsm. */
2822static void VtsID__rcdec ( VtsID ii )
2823{
2824 VtsTE* ie;
2825 /* VG_(indexXA) does a range check for us */
2826 ie = VG_(indexXA)( vts_tab, ii );
2827 tl_assert(ie->vts); /* else it's not in use */
2828 tl_assert(ie->rc > 0); /* else RC snafu */
2829 tl_assert(ie->vts->id == ii);
2830 ie->rc--;
2831}
2832
2833
sewardj7aa38a92011-02-27 23:04:12 +00002834/* Look up 'cand' in our collection of VTSs. If present, return the
2835 VtsID for the pre-existing version. If not present, clone it, add
2836 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2837 it, and return that. */
2838static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002839{
sewardj7aa38a92011-02-27 23:04:12 +00002840 VTS* in_tab = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002841 tl_assert(cand->id == VtsID_INVALID);
sewardj7aa38a92011-02-27 23:04:12 +00002842 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2843 tl_assert(in_tab);
2844 if (already_have) {
2845 /* We already have a copy of 'cand'. Use that. */
sewardjf98e1c02008-10-25 16:22:41 +00002846 VtsTE* ie;
sewardj7aa38a92011-02-27 23:04:12 +00002847 tl_assert(in_tab->id != VtsID_INVALID);
2848 ie = VG_(indexXA)( vts_tab, in_tab->id );
2849 tl_assert(ie->vts == in_tab);
2850 return in_tab->id;
sewardjf98e1c02008-10-25 16:22:41 +00002851 } else {
2852 VtsID ii = get_new_VtsID();
2853 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
sewardj7aa38a92011-02-27 23:04:12 +00002854 ie->vts = in_tab;
sewardjf98e1c02008-10-25 16:22:41 +00002855 ie->rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002856 ie->u.freelink = VtsID_INVALID;
sewardj7aa38a92011-02-27 23:04:12 +00002857 in_tab->id = ii;
sewardjf98e1c02008-10-25 16:22:41 +00002858 return ii;
2859 }
2860}
2861
2862
florian6bd9dc12012-11-23 16:17:43 +00002863static void show_vts_stats ( const HChar* caller )
sewardjf98e1c02008-10-25 16:22:41 +00002864{
2865 UWord nSet, nTab, nLive;
2866 ULong totrc;
2867 UWord n, i;
2868 nSet = VG_(sizeFM)( vts_set );
2869 nTab = VG_(sizeXA)( vts_tab );
2870 totrc = 0;
2871 nLive = 0;
2872 n = VG_(sizeXA)( vts_tab );
2873 for (i = 0; i < n; i++) {
2874 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2875 if (ie->vts) {
2876 nLive++;
2877 totrc += (ULong)ie->rc;
2878 } else {
2879 tl_assert(ie->rc == 0);
2880 }
2881 }
2882 VG_(printf)(" show_vts_stats %s\n", caller);
2883 VG_(printf)(" vts_tab size %4lu\n", nTab);
2884 VG_(printf)(" vts_tab live %4lu\n", nLive);
2885 VG_(printf)(" vts_set size %4lu\n", nSet);
2886 VG_(printf)(" total rc %4llu\n", totrc);
2887}
2888
sewardjffce8152011-06-24 10:09:41 +00002889
2890/* --- Helpers for VtsID pruning --- */
2891
2892static
2893void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2894 /*MOD*/XArray* /* of VtsTE */ new_tab,
2895 VtsID* ii )
2896{
2897 VtsTE *old_te, *new_te;
2898 VtsID old_id, new_id;
2899 /* We're relying here on VG_(indexXA)'s range checking to assert on
2900 any stupid values, in particular *ii == VtsID_INVALID. */
2901 old_id = *ii;
2902 old_te = VG_(indexXA)( old_tab, old_id );
2903 old_te->rc--;
philippea1ac2f42015-05-01 17:12:00 +00002904 new_id = old_te->u.remap;
sewardjffce8152011-06-24 10:09:41 +00002905 new_te = VG_(indexXA)( new_tab, new_id );
2906 new_te->rc++;
2907 *ii = new_id;
2908}
2909
2910static
2911void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2912 /*MOD*/XArray* /* of VtsTE */ new_tab,
2913 SVal* s )
2914{
2915 SVal old_sv, new_sv;
2916 old_sv = *s;
2917 if (SVal__isC(old_sv)) {
2918 VtsID rMin, wMin;
2919 rMin = SVal__unC_Rmin(old_sv);
2920 wMin = SVal__unC_Wmin(old_sv);
2921 remap_VtsID( old_tab, new_tab, &rMin );
2922 remap_VtsID( old_tab, new_tab, &wMin );
2923 new_sv = SVal__mkC( rMin, wMin );
2924 *s = new_sv;
2925 }
2926}
2927
2928
sewardjf98e1c02008-10-25 16:22:41 +00002929/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002930__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002931static void vts_tab__do_GC ( Bool show_stats )
2932{
2933 UWord i, nTab, nLive, nFreed;
2934
sewardjffce8152011-06-24 10:09:41 +00002935 /* ---------- BEGIN VTS GC ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00002936 /* check this is actually necessary. */
2937 tl_assert(vts_tab_freelist == VtsID_INVALID);
2938
2939 /* empty the caches for partial order checks and binary joins. We
2940 could do better and prune out the entries to be deleted, but it
2941 ain't worth the hassle. */
2942 VtsID__invalidate_caches();
2943
2944 /* First, make the reference counts up to date. */
2945 zsm_flush_cache();
2946
2947 nTab = VG_(sizeXA)( vts_tab );
2948
2949 if (show_stats) {
2950 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2951 show_vts_stats("before GC");
2952 }
2953
sewardjffce8152011-06-24 10:09:41 +00002954 /* Now we can inspect the entire vts_tab. Any entries with zero
2955 .rc fields are now no longer in use and can be put back on the
sewardjf98e1c02008-10-25 16:22:41 +00002956 free list, removed from vts_set, and deleted. */
2957 nFreed = 0;
2958 for (i = 0; i < nTab; i++) {
2959 Bool present;
sewardjffce8152011-06-24 10:09:41 +00002960 UWord oldK = 0, oldV = 12345;
sewardjf98e1c02008-10-25 16:22:41 +00002961 VtsTE* te = VG_(indexXA)( vts_tab, i );
2962 if (te->vts == NULL) {
2963 tl_assert(te->rc == 0);
2964 continue; /* already on the free list (presumably) */
2965 }
2966 if (te->rc > 0)
2967 continue; /* in use */
2968 /* Ok, we got one we can free. */
2969 tl_assert(te->vts->id == i);
2970 /* first, remove it from vts_set. */
2971 present = VG_(delFromFM)( vts_set,
2972 &oldK, &oldV, (UWord)te->vts );
2973 tl_assert(present); /* else it isn't in vts_set ?! */
2974 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2975 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2976 /* now free the VTS itself */
2977 VTS__delete(te->vts);
2978 te->vts = NULL;
2979 /* and finally put this entry on the free list */
philippea1ac2f42015-05-01 17:12:00 +00002980 tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
sewardjf98e1c02008-10-25 16:22:41 +00002981 add_to_free_list( i );
2982 nFreed++;
2983 }
2984
2985 /* Now figure out when the next GC should be. We'll allow the
2986 number of VTSs to double before GCing again. Except of course
2987 that since we can't (or, at least, don't) shrink vts_tab, we
2988 can't set the threshhold value smaller than it. */
2989 tl_assert(nFreed <= nTab);
2990 nLive = nTab - nFreed;
2991 tl_assert(nLive >= 0 && nLive <= nTab);
2992 vts_next_GC_at = 2 * nLive;
2993 if (vts_next_GC_at < nTab)
2994 vts_next_GC_at = nTab;
2995
2996 if (show_stats) {
2997 show_vts_stats("after GC");
2998 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2999 }
3000
sewardj5e2ac3b2009-08-11 10:39:25 +00003001 if (VG_(clo_stats)) {
sewardjf98e1c02008-10-25 16:22:41 +00003002 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00003003 VG_(message)(Vg_DebugMsg,
philippef54cb662015-05-10 22:19:31 +00003004 "libhb: VTS GC: #%lu old size %lu live %lu (%2llu%%)\n",
3005 stats__vts_tab_GC,
3006 nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00003007 }
sewardjffce8152011-06-24 10:09:41 +00003008 /* ---------- END VTS GC ---------- */
3009
3010 /* Decide whether to do VTS pruning. We have one of three
3011 settings. */
3012 static UInt pruning_auto_ctr = 0; /* do not make non-static */
3013
3014 Bool do_pruning = False;
3015 switch (HG_(clo_vts_pruning)) {
3016 case 0: /* never */
3017 break;
3018 case 1: /* auto */
3019 do_pruning = (++pruning_auto_ctr % 5) == 0;
3020 break;
3021 case 2: /* always */
3022 do_pruning = True;
3023 break;
3024 default:
3025 tl_assert(0);
3026 }
3027
3028 /* The rest of this routine only handles pruning, so we can
3029 quit at this point if it is not to be done. */
3030 if (!do_pruning)
3031 return;
philippec3508652015-03-28 12:01:58 +00003032 /* No need to do pruning if no thread died since the last pruning as
3033 no VtsTE can be pruned. */
3034 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
3035 return;
sewardjffce8152011-06-24 10:09:41 +00003036
3037 /* ---------- BEGIN VTS PRUNING ---------- */
philippec3508652015-03-28 12:01:58 +00003038 /* Sort and check the very dead threads that died since the last pruning.
3039 Sorting is used for the check and so that we can quickly look
sewardjffce8152011-06-24 10:09:41 +00003040 up the dead-thread entries as we work through the VTSs. */
philippec3508652015-03-28 12:01:58 +00003041 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003042
3043 /* We will run through the old table, and create a new table and
philippea1ac2f42015-05-01 17:12:00 +00003044 set, at the same time setting the u.remap entries in the old
sewardjffce8152011-06-24 10:09:41 +00003045 table to point to the new entries. Then, visit every VtsID in
3046 the system, and replace all of them with new ones, using the
philippea1ac2f42015-05-01 17:12:00 +00003047 u.remap entries in the old table. Finally, we can delete the old
sewardjffce8152011-06-24 10:09:41 +00003048 table and set. */
3049
3050 XArray* /* of VtsTE */ new_tab
3051 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
3052 HG_(free), sizeof(VtsTE) );
3053
3054 /* WordFM VTS* void */
3055 WordFM* new_set
3056 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
3057 HG_(free),
3058 (Word(*)(UWord,UWord))VTS__cmp_structural );
3059
3060 /* Visit each old VTS. For each one:
3061
3062 * make a pruned version
3063
3064 * search new_set for the pruned version, yielding either
3065 Nothing (not present) or the new VtsID for it.
3066
3067 * if not present, allocate a new VtsID for it, insert (pruned
3068 VTS, new VtsID) in the tree, and set
3069 remap_table[old VtsID] = new VtsID.
3070
3071 * if present, set remap_table[old VtsID] = new VtsID, where
3072 new VtsID was determined by the tree lookup. Then free up
3073 the clone.
3074 */
3075
3076 UWord nBeforePruning = 0, nAfterPruning = 0;
3077 UWord nSTSsBefore = 0, nSTSsAfter = 0;
3078 VtsID new_VtsID_ctr = 0;
3079
3080 for (i = 0; i < nTab; i++) {
3081
3082 /* For each old VTS .. */
3083 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
3084 VTS* old_vts = old_te->vts;
sewardjffce8152011-06-24 10:09:41 +00003085
3086 /* Skip it if not in use */
3087 if (old_te->rc == 0) {
3088 tl_assert(old_vts == NULL);
3089 continue;
3090 }
philippea1ac2f42015-05-01 17:12:00 +00003091 tl_assert(old_te->u.remap == VtsID_INVALID);
sewardjffce8152011-06-24 10:09:41 +00003092 tl_assert(old_vts != NULL);
3093 tl_assert(old_vts->id == i);
3094 tl_assert(old_vts->ts != NULL);
3095
3096 /* It is in use. Make a pruned version. */
3097 nBeforePruning++;
3098 nSTSsBefore += old_vts->usedTS;
3099 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
philippec3508652015-03-28 12:01:58 +00003100 old_vts, verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003101 tl_assert(new_vts->sizeTS == new_vts->usedTS);
3102 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
3103 == 0x0ddC0ffeeBadF00dULL);
3104
3105 /* Get rid of the old VTS and the tree entry. It's a bit more
3106 complex to incrementally delete the VTSs now than to nuke
3107 them all after we're done, but the upside is that we don't
3108 wind up temporarily storing potentially two complete copies
3109 of each VTS and hence spiking memory use. */
3110 UWord oldK = 0, oldV = 12345;
3111 Bool present = VG_(delFromFM)( vts_set,
3112 &oldK, &oldV, (UWord)old_vts );
3113 tl_assert(present); /* else it isn't in vts_set ?! */
3114 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3115 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
3116 /* now free the VTS itself */
3117 VTS__delete(old_vts);
3118 old_te->vts = NULL;
3119 old_vts = NULL;
3120
3121 /* NO MENTIONS of old_vts allowed beyond this point. */
3122
3123 /* Ok, we have the pruned copy in new_vts. See if a
3124 structurally identical version is already present in new_set.
3125 If so, delete the one we just made and move on; if not, add
3126 it. */
3127 VTS* identical_version = NULL;
3128 UWord valW = 12345;
3129 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
3130 (UWord)new_vts)) {
3131 // already have it
3132 tl_assert(valW == 0);
3133 tl_assert(identical_version != NULL);
3134 tl_assert(identical_version != new_vts);
3135 VTS__delete(new_vts);
3136 new_vts = identical_version;
3137 tl_assert(new_vts->id != VtsID_INVALID);
3138 } else {
3139 tl_assert(valW == 12345);
3140 tl_assert(identical_version == NULL);
3141 new_vts->id = new_VtsID_ctr++;
3142 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
3143 tl_assert(!b);
3144 VtsTE new_te;
3145 new_te.vts = new_vts;
3146 new_te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00003147 new_te.u.freelink = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003148 Word j = VG_(addToXA)( new_tab, &new_te );
3149 tl_assert(j <= i);
3150 tl_assert(j == new_VtsID_ctr - 1);
3151 // stats
3152 nAfterPruning++;
3153 nSTSsAfter += new_vts->usedTS;
3154 }
philippea1ac2f42015-05-01 17:12:00 +00003155 old_te->u.remap = new_vts->id;
sewardjffce8152011-06-24 10:09:41 +00003156
3157 } /* for (i = 0; i < nTab; i++) */
3158
philippec3508652015-03-28 12:01:58 +00003159 /* Move very dead thread from verydead_thread_table_not_pruned to
3160 verydead_thread_table. Sort and check verydead_thread_table
3161 to verify a thread was reported very dead only once. */
3162 {
3163 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
3164
3165 for (i = 0; i < nBT; i++) {
3166 ThrID thrid =
3167 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
3168 VG_(addToXA)( verydead_thread_table, &thrid );
3169 }
3170 verydead_thread_table_sort_and_check (verydead_thread_table);
3171 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
3172 }
3173
sewardjffce8152011-06-24 10:09:41 +00003174 /* At this point, we have:
philippea1ac2f42015-05-01 17:12:00 +00003175 * the old VTS table, with its u.remap entries set,
sewardjffce8152011-06-24 10:09:41 +00003176 and with all .vts == NULL.
3177 * the old VTS tree should be empty, since it and the old VTSs
3178 it contained have been incrementally deleted was we worked
3179 through the old table.
philippea1ac2f42015-05-01 17:12:00 +00003180 * the new VTS table, with all .rc == 0, all u.freelink and u.remap
sewardjffce8152011-06-24 10:09:41 +00003181 == VtsID_INVALID.
3182 * the new VTS tree.
3183 */
3184 tl_assert( VG_(sizeFM)(vts_set) == 0 );
3185
3186 /* Now actually apply the mapping. */
3187 /* Visit all the VtsIDs in the entire system. Where do we expect
3188 to find them?
3189 (a) in shadow memory -- the LineZs and LineFs
3190 (b) in our collection of struct _Thrs.
3191 (c) in our collection of struct _SOs.
3192 Nowhere else, AFAICS. Not in the zsm cache, because that just
3193 got invalidated.
3194
philippea1ac2f42015-05-01 17:12:00 +00003195 Using the u.remap fields in vts_tab, map each old VtsID to a new
sewardjffce8152011-06-24 10:09:41 +00003196 VtsID. For each old VtsID, dec its rc; and for each new one,
3197 inc it. This sets up the new refcounts, and it also gives a
3198 cheap sanity check of the old ones: all old refcounts should be
3199 zero after this operation.
3200 */
3201
3202 /* Do the mappings for (a) above: iterate over the Primary shadow
3203 mem map (WordFM Addr SecMap*). */
3204 UWord secmapW = 0;
3205 VG_(initIterFM)( map_shmem );
3206 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3207 UWord j;
3208 SecMap* sm = (SecMap*)secmapW;
3209 tl_assert(sm->magic == SecMap_MAGIC);
3210 /* Deal with the LineZs */
3211 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3212 LineZ* lineZ = &sm->linesZ[i];
3213 if (lineZ->dict[0] == SVal_INVALID)
3214 continue; /* not in use -- data is in F rep instead */
3215 for (j = 0; j < 4; j++)
3216 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3217 }
3218 /* Deal with the LineFs */
3219 for (i = 0; i < sm->linesF_size; i++) {
3220 LineF* lineF = &sm->linesF[i];
3221 if (!lineF->inUse)
3222 continue;
3223 for (j = 0; j < N_LINE_ARANGE; j++)
3224 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3225 }
3226 }
3227 VG_(doneIterFM)( map_shmem );
3228
3229 /* Do the mappings for (b) above: visit our collection of struct
3230 _Thrs. */
3231 Thread* hgthread = get_admin_threads();
3232 tl_assert(hgthread);
3233 while (hgthread) {
3234 Thr* hbthr = hgthread->hbthr;
3235 tl_assert(hbthr);
3236 /* Threads that are listed in the prunable set have their viR
3237 and viW set to VtsID_INVALID, so we can't mess with them. */
3238 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3239 tl_assert(hbthr->viR == VtsID_INVALID);
3240 tl_assert(hbthr->viW == VtsID_INVALID);
3241 hgthread = hgthread->admin;
3242 continue;
3243 }
3244 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3245 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3246 hgthread = hgthread->admin;
3247 }
3248
3249 /* Do the mappings for (c) above: visit the struct _SOs. */
3250 SO* so = admin_SO;
3251 while (so) {
3252 if (so->viR != VtsID_INVALID)
3253 remap_VtsID( vts_tab, new_tab, &so->viR );
3254 if (so->viW != VtsID_INVALID)
3255 remap_VtsID( vts_tab, new_tab, &so->viW );
3256 so = so->admin_next;
3257 }
3258
3259 /* So, we're nearly done (with this incredibly complex operation).
3260 Check the refcounts for the old VtsIDs all fell to zero, as
3261 expected. Any failure is serious. */
3262 for (i = 0; i < nTab; i++) {
3263 VtsTE* te = VG_(indexXA)( vts_tab, i );
3264 tl_assert(te->vts == NULL);
3265 /* This is the assert proper. Note we're also asserting
philippea1ac2f42015-05-01 17:12:00 +00003266 zeroness for old entries which are unmapped. That's OK. */
sewardjffce8152011-06-24 10:09:41 +00003267 tl_assert(te->rc == 0);
3268 }
3269
3270 /* Install the new table and set. */
3271 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3272 vts_set = new_set;
3273 VG_(deleteXA)( vts_tab );
3274 vts_tab = new_tab;
3275
3276 /* The freelist of vts_tab entries is empty now, because we've
3277 compacted all of the live entries at the low end of the
3278 table. */
3279 vts_tab_freelist = VtsID_INVALID;
3280
3281 /* Sanity check vts_set and vts_tab. */
3282
3283 /* Because all the live entries got slid down to the bottom of vts_tab: */
3284 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3285
3286 /* Assert that the vts_tab and vts_set entries point at each other
3287 in the required way */
3288 UWord wordK = 0, wordV = 0;
3289 VG_(initIterFM)( vts_set );
3290 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3291 tl_assert(wordK != 0);
3292 tl_assert(wordV == 0);
3293 VTS* vts = (VTS*)wordK;
3294 tl_assert(vts->id != VtsID_INVALID);
3295 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3296 tl_assert(te->vts == vts);
3297 }
3298 VG_(doneIterFM)( vts_set );
3299
3300 /* Also iterate over the table, and check each entry is
3301 plausible. */
3302 nTab = VG_(sizeXA)( vts_tab );
3303 for (i = 0; i < nTab; i++) {
3304 VtsTE* te = VG_(indexXA)( vts_tab, i );
3305 tl_assert(te->vts);
3306 tl_assert(te->vts->id == i);
3307 tl_assert(te->rc > 0); /* 'cos we just GC'd */
philippea1ac2f42015-05-01 17:12:00 +00003308 tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3309 /* value of te->u.remap not relevant */
sewardjffce8152011-06-24 10:09:41 +00003310 }
3311
3312 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
3313 if (VG_(clo_stats)) {
3314 static UInt ctr = 1;
3315 tl_assert(nTab > 0);
3316 VG_(message)(
3317 Vg_DebugMsg,
3318 "libhb: VTS PR: #%u before %lu (avg sz %lu) "
3319 "after %lu (avg sz %lu)\n",
3320 ctr++,
3321 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3322 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3323 );
3324 }
sewardjffce8152011-06-24 10:09:41 +00003325 /* ---------- END VTS PRUNING ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00003326}
3327
3328
3329/////////////////////////////////////////////////////////
3330// //
3331// Vts IDs //
3332// //
3333/////////////////////////////////////////////////////////
3334
3335//////////////////////////
sewardj7aa38a92011-02-27 23:04:12 +00003336/* A temporary, max-sized VTS which is used as a temporary (the first
3337 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3338static VTS* temp_max_sized_VTS = NULL;
3339
3340//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00003341static ULong stats__cmpLEQ_queries = 0;
3342static ULong stats__cmpLEQ_misses = 0;
3343static ULong stats__join2_queries = 0;
3344static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003345
3346static inline UInt ROL32 ( UInt w, Int n ) {
3347 w = (w << n) | (w >> (32-n));
3348 return w;
3349}
3350static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3351 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3352 return hash % nTab;
3353}
3354
sewardj23f12002009-07-24 08:45:08 +00003355#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00003356static
sewardj23f12002009-07-24 08:45:08 +00003357 struct { VtsID vi1; VtsID vi2; Bool leq; }
3358 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00003359
3360#define N_JOIN2_CACHE 1023
3361static
3362 struct { VtsID vi1; VtsID vi2; VtsID res; }
3363 join2_cache[N_JOIN2_CACHE];
3364
3365static void VtsID__invalidate_caches ( void ) {
3366 Int i;
sewardj23f12002009-07-24 08:45:08 +00003367 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3368 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3369 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3370 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00003371 }
3372 for (i = 0; i < N_JOIN2_CACHE; i++) {
3373 join2_cache[i].vi1 = VtsID_INVALID;
3374 join2_cache[i].vi2 = VtsID_INVALID;
3375 join2_cache[i].res = VtsID_INVALID;
3376 }
3377}
3378//////////////////////////
3379
sewardjd52392d2008-11-08 20:36:26 +00003380//static Bool VtsID__is_valid ( VtsID vi ) {
3381// VtsTE* ve;
3382// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3383// return False;
3384// ve = VG_(indexXA)( vts_tab, vi );
3385// if (!ve->vts)
3386// return False;
3387// tl_assert(ve->vts->id == vi);
3388// return True;
3389//}
sewardjf98e1c02008-10-25 16:22:41 +00003390
3391static VTS* VtsID__to_VTS ( VtsID vi ) {
3392 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3393 tl_assert(te->vts);
3394 return te->vts;
3395}
3396
3397static void VtsID__pp ( VtsID vi ) {
sewardjf98e1c02008-10-25 16:22:41 +00003398 VTS* vts = VtsID__to_VTS(vi);
florianb28fe892014-10-28 20:52:07 +00003399 VTS__show( vts );
sewardjf98e1c02008-10-25 16:22:41 +00003400}
3401
3402/* compute partial ordering relation of vi1 and vi2. */
3403__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00003404static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00003405 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00003406 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00003407 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00003408 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00003409 tl_assert(vi1 != vi2);
3410 ////++
sewardj23f12002009-07-24 08:45:08 +00003411 stats__cmpLEQ_queries++;
3412 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3413 if (cmpLEQ_cache[hash].vi1 == vi1
3414 && cmpLEQ_cache[hash].vi2 == vi2)
3415 return cmpLEQ_cache[hash].leq;
3416 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00003417 ////--
3418 v1 = VtsID__to_VTS(vi1);
3419 v2 = VtsID__to_VTS(vi2);
sewardje4cce742011-02-24 15:25:24 +00003420 leq = VTS__cmpLEQ( v1, v2 ) == 0;
sewardjf98e1c02008-10-25 16:22:41 +00003421 ////++
sewardj23f12002009-07-24 08:45:08 +00003422 cmpLEQ_cache[hash].vi1 = vi1;
3423 cmpLEQ_cache[hash].vi2 = vi2;
3424 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00003425 ////--
sewardj23f12002009-07-24 08:45:08 +00003426 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00003427}
sewardj23f12002009-07-24 08:45:08 +00003428static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3429 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003430}
3431
3432/* compute binary join */
3433__attribute__((noinline))
3434static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3435 UInt hash;
3436 VtsID res;
sewardj7aa38a92011-02-27 23:04:12 +00003437 VTS *vts1, *vts2;
sewardjf98e1c02008-10-25 16:22:41 +00003438 //if (vi1 == vi2) return vi1;
3439 tl_assert(vi1 != vi2);
3440 ////++
3441 stats__join2_queries++;
3442 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3443 if (join2_cache[hash].vi1 == vi1
3444 && join2_cache[hash].vi2 == vi2)
3445 return join2_cache[hash].res;
3446 stats__join2_misses++;
3447 ////--
3448 vts1 = VtsID__to_VTS(vi1);
3449 vts2 = VtsID__to_VTS(vi2);
sewardj7aa38a92011-02-27 23:04:12 +00003450 temp_max_sized_VTS->usedTS = 0;
3451 VTS__join(temp_max_sized_VTS, vts1,vts2);
3452 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003453 ////++
3454 join2_cache[hash].vi1 = vi1;
3455 join2_cache[hash].vi2 = vi2;
3456 join2_cache[hash].res = res;
3457 ////--
3458 return res;
3459}
3460static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003461 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003462}
3463
3464/* create a singleton VTS, namely [thr:1] */
3465static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
sewardj7aa38a92011-02-27 23:04:12 +00003466 temp_max_sized_VTS->usedTS = 0;
3467 VTS__singleton(temp_max_sized_VTS, thr,tym);
3468 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003469}
3470
3471/* tick operation, creates value 1 if specified index is absent */
3472static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3473 VTS* vts = VtsID__to_VTS(vi);
sewardj7aa38a92011-02-27 23:04:12 +00003474 temp_max_sized_VTS->usedTS = 0;
3475 VTS__tick(temp_max_sized_VTS, idx,vts);
3476 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003477}
3478
3479/* index into a VTS (only for assertions) */
3480static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3481 VTS* vts = VtsID__to_VTS(vi);
3482 return VTS__indexAt_SLOW( vts, idx );
3483}
3484
sewardj23f12002009-07-24 08:45:08 +00003485/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3486 any, really) element in vi1 which is pointwise greater-than the
3487 corresponding element in vi2. If no such element exists, return
3488 NULL. This needs to be fairly quick since it is called every time
3489 a race is detected. */
3490static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3491{
3492 VTS *vts1, *vts2;
sewardje4cce742011-02-24 15:25:24 +00003493 Thr* diffthr;
3494 ThrID diffthrid;
sewardj23f12002009-07-24 08:45:08 +00003495 tl_assert(vi1 != vi2);
3496 vts1 = VtsID__to_VTS(vi1);
3497 vts2 = VtsID__to_VTS(vi2);
3498 tl_assert(vts1 != vts2);
sewardje4cce742011-02-24 15:25:24 +00003499 diffthrid = VTS__cmpLEQ(vts1, vts2);
3500 diffthr = Thr__from_ThrID(diffthrid);
sewardj23f12002009-07-24 08:45:08 +00003501 tl_assert(diffthr); /* else they are LEQ ! */
3502 return diffthr;
3503}
3504
3505
3506/////////////////////////////////////////////////////////
3507// //
3508// Filters //
3509// //
3510/////////////////////////////////////////////////////////
3511
sewardj23f12002009-07-24 08:45:08 +00003512/* Forget everything we know -- clear the filter and let everything
3513 through. This needs to be as fast as possible, since it is called
3514 every time the running thread changes, and every time a thread's
3515 vector clocks change, which can be quite frequent. The obvious
3516 fast way to do this is simply to stuff in tags which we know are
3517 not going to match anything, since they're not aligned to the start
3518 of a line. */
florian6bd9dc12012-11-23 16:17:43 +00003519static void Filter__clear ( Filter* fi, const HChar* who )
sewardj23f12002009-07-24 08:45:08 +00003520{
3521 UWord i;
3522 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3523 for (i = 0; i < FI_NUM_LINES; i += 8) {
3524 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3525 fi->tags[i+1] = 1;
3526 fi->tags[i+2] = 1;
3527 fi->tags[i+3] = 1;
3528 fi->tags[i+4] = 1;
3529 fi->tags[i+5] = 1;
3530 fi->tags[i+6] = 1;
3531 fi->tags[i+7] = 1;
3532 }
3533 tl_assert(i == FI_NUM_LINES);
3534}
3535
3536/* Clearing an arbitrary range in the filter. Unfortunately
3537 we have to do this due to core-supplied new/die-mem events. */
3538
3539static void Filter__clear_1byte ( Filter* fi, Addr a )
3540{
3541 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3542 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3543 FiLine* line = &fi->lines[lineno];
3544 UWord loff = (a - atag) / 8;
3545 UShort mask = 0x3 << (2 * (a & 7));
3546 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3547 if (LIKELY( fi->tags[lineno] == atag )) {
3548 /* hit. clear the bits. */
3549 UShort u16 = line->u16s[loff];
3550 line->u16s[loff] = u16 & ~mask; /* clear them */
3551 } else {
3552 /* miss. The filter doesn't hold this address, so ignore. */
3553 }
3554}
3555
3556static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3557{
3558 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3559 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3560 FiLine* line = &fi->lines[lineno];
3561 UWord loff = (a - atag) / 8;
3562 if (LIKELY( fi->tags[lineno] == atag )) {
3563 line->u16s[loff] = 0;
3564 } else {
3565 /* miss. The filter doesn't hold this address, so ignore. */
3566 }
3567}
3568
3569static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3570{
3571 //VG_(printf)("%lu ", len);
3572 /* slowly do part preceding 8-alignment */
3573 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3574 Filter__clear_1byte( fi, a );
3575 a++;
3576 len--;
3577 }
3578 /* vector loop */
3579 while (len >= 8) {
3580 Filter__clear_8bytes_aligned( fi, a );
3581 a += 8;
3582 len -= 8;
3583 }
3584 /* slowly do tail */
3585 while (UNLIKELY(len > 0)) {
3586 Filter__clear_1byte( fi, a );
3587 a++;
3588 len--;
3589 }
3590}
3591
3592
3593/* ------ Read handlers for the filter. ------ */
3594
3595static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3596{
3597 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3598 return False;
3599 {
3600 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3601 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3602 FiLine* line = &fi->lines[lineno];
3603 UWord loff = (a - atag) / 8;
3604 UShort mask = 0xAAAA;
3605 if (LIKELY( fi->tags[lineno] == atag )) {
3606 /* hit. check line and update. */
3607 UShort u16 = line->u16s[loff];
3608 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3609 line->u16s[loff] = u16 | mask; /* set them */
3610 return ok;
3611 } else {
3612 /* miss. nuke existing line and re-use it. */
3613 UWord i;
3614 fi->tags[lineno] = atag;
3615 for (i = 0; i < FI_LINE_SZB / 8; i++)
3616 line->u16s[i] = 0;
3617 line->u16s[loff] = mask;
3618 return False;
3619 }
3620 }
3621}
3622
3623static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3624{
3625 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3626 return False;
3627 {
3628 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3629 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3630 FiLine* line = &fi->lines[lineno];
3631 UWord loff = (a - atag) / 8;
3632 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3633 if (LIKELY( fi->tags[lineno] == atag )) {
3634 /* hit. check line and update. */
3635 UShort u16 = line->u16s[loff];
3636 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3637 line->u16s[loff] = u16 | mask; /* set them */
3638 return ok;
3639 } else {
3640 /* miss. nuke existing line and re-use it. */
3641 UWord i;
3642 fi->tags[lineno] = atag;
3643 for (i = 0; i < FI_LINE_SZB / 8; i++)
3644 line->u16s[i] = 0;
3645 line->u16s[loff] = mask;
3646 return False;
3647 }
3648 }
3649}
3650
3651static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3652{
3653 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3654 return False;
3655 {
3656 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3657 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3658 FiLine* line = &fi->lines[lineno];
3659 UWord loff = (a - atag) / 8;
3660 UShort mask = 0xA << (2 * (a & 6));
3661 /* mask is A000, 0A00, 00A0 or 000A */
3662 if (LIKELY( fi->tags[lineno] == atag )) {
3663 /* hit. check line and update. */
3664 UShort u16 = line->u16s[loff];
3665 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3666 line->u16s[loff] = u16 | mask; /* set them */
3667 return ok;
3668 } else {
3669 /* miss. nuke existing line and re-use it. */
3670 UWord i;
3671 fi->tags[lineno] = atag;
3672 for (i = 0; i < FI_LINE_SZB / 8; i++)
3673 line->u16s[i] = 0;
3674 line->u16s[loff] = mask;
3675 return False;
3676 }
3677 }
3678}
3679
3680static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3681{
3682 {
3683 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3684 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3685 FiLine* line = &fi->lines[lineno];
3686 UWord loff = (a - atag) / 8;
3687 UShort mask = 0x2 << (2 * (a & 7));
3688 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3689 if (LIKELY( fi->tags[lineno] == atag )) {
3690 /* hit. check line and update. */
3691 UShort u16 = line->u16s[loff];
3692 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3693 line->u16s[loff] = u16 | mask; /* set them */
3694 return ok;
3695 } else {
3696 /* miss. nuke existing line and re-use it. */
3697 UWord i;
3698 fi->tags[lineno] = atag;
3699 for (i = 0; i < FI_LINE_SZB / 8; i++)
3700 line->u16s[i] = 0;
3701 line->u16s[loff] = mask;
3702 return False;
3703 }
3704 }
3705}
3706
3707
3708/* ------ Write handlers for the filter. ------ */
3709
3710static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3711{
3712 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3713 return False;
3714 {
3715 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3716 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3717 FiLine* line = &fi->lines[lineno];
3718 UWord loff = (a - atag) / 8;
3719 UShort mask = 0xFFFF;
3720 if (LIKELY( fi->tags[lineno] == atag )) {
3721 /* hit. check line and update. */
3722 UShort u16 = line->u16s[loff];
3723 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3724 line->u16s[loff] = u16 | mask; /* set them */
3725 return ok;
3726 } else {
3727 /* miss. nuke existing line and re-use it. */
3728 UWord i;
3729 fi->tags[lineno] = atag;
3730 for (i = 0; i < FI_LINE_SZB / 8; i++)
3731 line->u16s[i] = 0;
3732 line->u16s[loff] = mask;
3733 return False;
3734 }
3735 }
3736}
3737
3738static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3739{
3740 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3741 return False;
3742 {
3743 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3744 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3745 FiLine* line = &fi->lines[lineno];
3746 UWord loff = (a - atag) / 8;
3747 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3748 if (LIKELY( fi->tags[lineno] == atag )) {
3749 /* hit. check line and update. */
3750 UShort u16 = line->u16s[loff];
3751 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3752 line->u16s[loff] = u16 | mask; /* set them */
3753 return ok;
3754 } else {
3755 /* miss. nuke existing line and re-use it. */
3756 UWord i;
3757 fi->tags[lineno] = atag;
3758 for (i = 0; i < FI_LINE_SZB / 8; i++)
3759 line->u16s[i] = 0;
3760 line->u16s[loff] = mask;
3761 return False;
3762 }
3763 }
3764}
3765
3766static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3767{
3768 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3769 return False;
3770 {
3771 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3772 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3773 FiLine* line = &fi->lines[lineno];
3774 UWord loff = (a - atag) / 8;
3775 UShort mask = 0xF << (2 * (a & 6));
3776 /* mask is F000, 0F00, 00F0 or 000F */
3777 if (LIKELY( fi->tags[lineno] == atag )) {
3778 /* hit. check line and update. */
3779 UShort u16 = line->u16s[loff];
3780 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3781 line->u16s[loff] = u16 | mask; /* set them */
3782 return ok;
3783 } else {
3784 /* miss. nuke existing line and re-use it. */
3785 UWord i;
3786 fi->tags[lineno] = atag;
3787 for (i = 0; i < FI_LINE_SZB / 8; i++)
3788 line->u16s[i] = 0;
3789 line->u16s[loff] = mask;
3790 return False;
3791 }
3792 }
3793}
3794
3795static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3796{
3797 {
3798 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3799 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3800 FiLine* line = &fi->lines[lineno];
3801 UWord loff = (a - atag) / 8;
3802 UShort mask = 0x3 << (2 * (a & 7));
3803 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3804 if (LIKELY( fi->tags[lineno] == atag )) {
3805 /* hit. check line and update. */
3806 UShort u16 = line->u16s[loff];
3807 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3808 line->u16s[loff] = u16 | mask; /* set them */
3809 return ok;
3810 } else {
3811 /* miss. nuke existing line and re-use it. */
3812 UWord i;
3813 fi->tags[lineno] = atag;
3814 for (i = 0; i < FI_LINE_SZB / 8; i++)
3815 line->u16s[i] = 0;
3816 line->u16s[loff] = mask;
3817 return False;
3818 }
3819 }
3820}
3821
sewardjf98e1c02008-10-25 16:22:41 +00003822
3823/////////////////////////////////////////////////////////
3824// //
3825// Threads //
3826// //
3827/////////////////////////////////////////////////////////
3828
sewardje4cce742011-02-24 15:25:24 +00003829/* Maps ThrID values to their Thr*s (which contain ThrID values that
3830 should point back to the relevant slot in the array. Lowest
3831 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
3832static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
3833
3834/* And a counter to dole out ThrID values. For rationale/background,
3835 see comments on definition of ScalarTS (far) above. */
sewardj7aa38a92011-02-27 23:04:12 +00003836static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
sewardje4cce742011-02-24 15:25:24 +00003837
3838static ThrID Thr__to_ThrID ( Thr* thr ) {
3839 return thr->thrid;
3840}
3841static Thr* Thr__from_ThrID ( UInt thrid ) {
3842 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
3843 tl_assert(thr->thrid == thrid);
3844 return thr;
3845}
3846
3847static Thr* Thr__new ( void )
3848{
sewardjf98e1c02008-10-25 16:22:41 +00003849 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
3850 thr->viR = VtsID_INVALID;
3851 thr->viW = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003852 thr->llexit_done = False;
3853 thr->joinedwith_done = False;
sewardj23f12002009-07-24 08:45:08 +00003854 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
philippeca903bb2014-04-26 22:50:08 +00003855 if (HG_(clo_history_level) == 1)
3856 thr->local_Kws_n_stacks
3857 = VG_(newXA)( HG_(zalloc),
3858 "libhb.Thr__new.3 (local_Kws_and_stacks)",
3859 HG_(free), sizeof(ULong_n_EC) );
sewardje4cce742011-02-24 15:25:24 +00003860
3861 /* Add this Thr* <-> ThrID binding to the mapping, and
3862 cross-check */
3863 if (!thrid_to_thr_map) {
3864 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
3865 HG_(free), sizeof(Thr*) );
sewardje4cce742011-02-24 15:25:24 +00003866 }
3867
sewardj7aa38a92011-02-27 23:04:12 +00003868 if (thrid_counter >= ThrID_MAX_VALID) {
sewardje4cce742011-02-24 15:25:24 +00003869 /* We're hosed. We have to stop. */
3870 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
3871 }
3872
3873 thr->thrid = thrid_counter++;
3874 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
3875 tl_assert(ix + 1024 == thr->thrid);
3876
sewardjf98e1c02008-10-25 16:22:41 +00003877 return thr;
3878}
3879
sewardj8ab2c132009-08-02 09:34:35 +00003880static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00003881{
3882 Word nPresent;
3883 ULong_n_EC pair;
3884 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00003885
3886 // We only collect this info at history level 1 (approx)
3887 if (HG_(clo_history_level) != 1)
3888 return;
3889
sewardj8ab2c132009-08-02 09:34:35 +00003890 /* This is the scalar Kw for thr. */
3891 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00003892 pair.ec = main_get_EC( thr );
3893 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00003894 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00003895
3896 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00003897 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00003898
3899 /* Throw away old stacks, if necessary. We can't accumulate stuff
3900 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00003901 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
3902 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
3903 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
3904 if (0)
3905 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00003906 thr, pair.ull, pair.ec );
3907 }
3908
3909 if (nPresent > 0) {
3910 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00003911 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
3912 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00003913 }
3914
3915 if (nPresent == 0)
3916 pair.ec = NULL;
3917
sewardj8ab2c132009-08-02 09:34:35 +00003918 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00003919
3920 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00003921 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00003922 thr, pair.ull, pair.ec );
3923 if (0)
3924 VG_(pp_ExeContext)(pair.ec);
3925}
3926
florian6bd9dc12012-11-23 16:17:43 +00003927static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
3928 const ULong_n_EC* pair2 )
sewardj23f12002009-07-24 08:45:08 +00003929{
3930 if (pair1->ull < pair2->ull) return -1;
3931 if (pair1->ull > pair2->ull) return 1;
3932 return 0;
3933}
3934
sewardjf98e1c02008-10-25 16:22:41 +00003935
3936/////////////////////////////////////////////////////////
3937// //
3938// Shadow Values //
3939// //
3940/////////////////////////////////////////////////////////
3941
3942// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
3943// hb_zsm.h. We have to do everything else here.
3944
3945/* SVal is 64 bit unsigned int.
3946
3947 <---------30---------> <---------30--------->
3948 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00003949 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00003950 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
3951
sewardjf98e1c02008-10-25 16:22:41 +00003952*/
3953#define SVAL_TAGMASK (3ULL << 62)
3954
3955static inline Bool SVal__isC ( SVal s ) {
3956 return (0ULL << 62) == (s & SVAL_TAGMASK);
3957}
3958static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
3959 //tl_assert(VtsID__is_valid(rmini));
3960 //tl_assert(VtsID__is_valid(wmini));
3961 return (((ULong)rmini) << 32) | ((ULong)wmini);
3962}
3963static inline VtsID SVal__unC_Rmin ( SVal s ) {
3964 tl_assert(SVal__isC(s));
3965 return (VtsID)(s >> 32);
3966}
3967static inline VtsID SVal__unC_Wmin ( SVal s ) {
3968 tl_assert(SVal__isC(s));
3969 return (VtsID)(s & 0xFFFFFFFFULL);
3970}
3971
sewardj23f12002009-07-24 08:45:08 +00003972static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00003973 return (2ULL << 62) == (s & SVAL_TAGMASK);
3974}
sewardj5aa09bf2014-06-20 14:25:53 +00003975__attribute__((unused))
sewardj23f12002009-07-24 08:45:08 +00003976static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00003977 return 2ULL << 62;
3978}
3979
3980/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00003981static inline void SVal__rcinc ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00003982 if (SVal__isC(s)) {
3983 VtsID__rcinc( SVal__unC_Rmin(s) );
3984 VtsID__rcinc( SVal__unC_Wmin(s) );
3985 }
3986}
3987
3988/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00003989static inline void SVal__rcdec ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00003990 if (SVal__isC(s)) {
3991 VtsID__rcdec( SVal__unC_Rmin(s) );
3992 VtsID__rcdec( SVal__unC_Wmin(s) );
3993 }
3994}
3995
3996
3997/////////////////////////////////////////////////////////
3998// //
3999// Change-event map2 //
4000// //
4001/////////////////////////////////////////////////////////
4002
sewardjf98e1c02008-10-25 16:22:41 +00004003/* This is in two parts:
4004
sewardj23f12002009-07-24 08:45:08 +00004005 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00004006 traces. When the reference count of a stack trace becomes zero,
4007 it is removed from the set and freed up. The intent is to have
4008 a set of stack traces which can be referred to from (2), but to
4009 only represent each one once. The set is indexed/searched by
4010 ordering on the stack trace vectors.
4011
sewardj849b0ed2008-12-21 10:43:10 +00004012 2. A SparseWA of OldRefs. These store information about each old
4013 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00004014 location for which the information is recorded. For LRU
philippecabdbb52015-04-20 21:33:16 +00004015 purposes, each OldRef in the SparseWA is also on a doubly
4016 linked list maintaining the order in which the OldRef were most
4017 recently accessed.
sewardjf98e1c02008-10-25 16:22:41 +00004018
4019 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00004020 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
4021 size) triples to RCECs. This allows us to collect the last
4022 access-traceback by up to N_OLDREF_ACCS different triples for
4023 this location. The accs[] array is a MTF-array. If a binding
4024 falls off the end, that's too bad -- we will lose info about
4025 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00004026
philippecabdbb52015-04-20 21:33:16 +00004027 We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
4028 Then we do exact LRU discarding. For each discarded OldRef we must
sewardjf98e1c02008-10-25 16:22:41 +00004029 of course decrement the reference count on the all RCECs it
4030 refers to, in order that entries from (1) eventually get
4031 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00004032
4033 A major improvement in reliability of this mechanism would be to
4034 have a dynamically sized OldRef.accs[] array, so no entries ever
4035 fall off the end. In investigations (Dec 08) it appears that a
4036 major cause for the non-availability of conflicting-access traces
4037 in race reports is caused by the fixed size of this array. I
4038 suspect for most OldRefs, only a few entries are used, but for a
4039 minority of cases there is an overflow, leading to info lossage.
4040 Investigations also suggest this is very workload and scheduling
4041 sensitive. Therefore a dynamic sizing would be better.
4042
philippe6643e962012-01-17 21:16:30 +00004043 However, dynamic sizing would defeat the use of a PoolAllocator
sewardj849b0ed2008-12-21 10:43:10 +00004044 for OldRef structures. And that's important for performance. So
4045 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00004046*/
4047
4048
4049static UWord stats__ctxt_rcdec1 = 0;
4050static UWord stats__ctxt_rcdec2 = 0;
4051static UWord stats__ctxt_rcdec3 = 0;
4052static UWord stats__ctxt_rcdec_calls = 0;
4053static UWord stats__ctxt_rcdec_discards = 0;
4054static UWord stats__ctxt_rcdec1_eq = 0;
4055
4056static UWord stats__ctxt_tab_curr = 0;
4057static UWord stats__ctxt_tab_max = 0;
4058
4059static UWord stats__ctxt_tab_qs = 0;
4060static UWord stats__ctxt_tab_cmps = 0;
4061
4062
4063///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00004064//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00004065///
4066
4067#define N_FRAMES 8
4068
4069// (UInt) `echo "Reference Counted Execution Context" | md5sum`
4070#define RCEC_MAGIC 0xab88abb2UL
4071
4072//#define N_RCEC_TAB 98317 /* prime */
4073#define N_RCEC_TAB 196613 /* prime */
4074
4075typedef
4076 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00004077 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00004078 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00004079 UWord rc;
4080 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00004081 UWord frames_hash; /* hash of all the frames */
4082 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00004083 }
4084 RCEC;
4085
philippecabdbb52015-04-20 21:33:16 +00004086//////////// BEGIN RCEC pool allocator
4087static PoolAlloc* rcec_pool_allocator;
4088static RCEC* alloc_RCEC ( void ) {
4089 return VG_(allocEltPA) ( rcec_pool_allocator );
4090}
4091
4092static void free_RCEC ( RCEC* rcec ) {
4093 tl_assert(rcec->magic == RCEC_MAGIC);
4094 VG_(freeEltPA)( rcec_pool_allocator, rcec );
4095}
4096//////////// END RCEC pool allocator
4097
sewardjf98e1c02008-10-25 16:22:41 +00004098static RCEC** contextTab = NULL; /* hash table of RCEC*s */
4099
philippecabdbb52015-04-20 21:33:16 +00004100/* Count of allocated RCEC having ref count > 0 */
4101static UWord RCEC_referenced = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004102
4103/* Gives an arbitrary total order on RCEC .frames fields */
4104static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4105 Word i;
4106 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4107 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00004108 if (ec1->frames_hash < ec2->frames_hash) return -1;
4109 if (ec1->frames_hash > ec2->frames_hash) return 1;
4110 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004111 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00004112 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00004113 }
4114 return 0;
4115}
4116
4117
4118/* Dec the ref of this RCEC. */
4119static void ctxt__rcdec ( RCEC* ec )
4120{
4121 stats__ctxt_rcdec_calls++;
4122 tl_assert(ec && ec->magic == RCEC_MAGIC);
4123 tl_assert(ec->rc > 0);
4124 ec->rc--;
philippecabdbb52015-04-20 21:33:16 +00004125 if (ec->rc == 0)
4126 RCEC_referenced--;
sewardjf98e1c02008-10-25 16:22:41 +00004127}
4128
4129static void ctxt__rcinc ( RCEC* ec )
4130{
4131 tl_assert(ec && ec->magic == RCEC_MAGIC);
philippecabdbb52015-04-20 21:33:16 +00004132 if (ec->rc == 0)
4133 RCEC_referenced++;
sewardjf98e1c02008-10-25 16:22:41 +00004134 ec->rc++;
4135}
4136
4137
4138/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
4139 move it one step closer the the front of the list, so as to make
4140 subsequent searches for it cheaper. */
4141static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4142{
4143 RCEC *ec0, *ec1, *ec2;
4144 if (ec == *headp)
4145 tl_assert(0); /* already at head of list */
4146 tl_assert(ec != NULL);
4147 ec0 = *headp;
4148 ec1 = NULL;
4149 ec2 = NULL;
4150 while (True) {
4151 if (ec0 == NULL || ec0 == ec) break;
4152 ec2 = ec1;
4153 ec1 = ec0;
4154 ec0 = ec0->next;
4155 }
4156 tl_assert(ec0 == ec);
4157 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4158 RCEC* tmp;
4159 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4160 predecessor. Swap ec0 and ec1, that is, move ec0 one step
4161 closer to the start of the list. */
4162 tl_assert(ec2->next == ec1);
4163 tl_assert(ec1->next == ec0);
4164 tmp = ec0->next;
4165 ec2->next = ec0;
4166 ec0->next = ec1;
4167 ec1->next = tmp;
4168 }
4169 else
4170 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4171 /* it's second in the list. */
4172 tl_assert(*headp == ec1);
4173 tl_assert(ec1->next == ec0);
4174 ec1->next = ec0->next;
4175 ec0->next = ec1;
4176 *headp = ec0;
4177 }
4178}
4179
4180
4181/* Find the given RCEC in the tree, and return a pointer to it. Or,
4182 if not present, add the given one to the tree (by making a copy of
4183 it, so the caller can immediately deallocate the original) and
4184 return a pointer to the copy. The caller can safely have 'example'
4185 on its stack, since we will always return a pointer to a copy of
4186 it, not to the original. Note that the inserted node will have .rc
4187 of zero and so the caller must immediatly increment it. */
4188__attribute__((noinline))
4189static RCEC* ctxt__find_or_add ( RCEC* example )
4190{
4191 UWord hent;
4192 RCEC* copy;
4193 tl_assert(example && example->magic == RCEC_MAGIC);
4194 tl_assert(example->rc == 0);
4195
4196 /* Search the hash table to see if we already have it. */
4197 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00004198 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00004199 copy = contextTab[hent];
4200 while (1) {
4201 if (!copy) break;
4202 tl_assert(copy->magic == RCEC_MAGIC);
4203 stats__ctxt_tab_cmps++;
4204 if (0 == RCEC__cmp_by_frames(copy, example)) break;
4205 copy = copy->next;
4206 }
4207
4208 if (copy) {
4209 tl_assert(copy != example);
4210 /* optimisation: if it's not at the head of its list, move 1
4211 step fwds, to make future searches cheaper */
4212 if (copy != contextTab[hent]) {
4213 move_RCEC_one_step_forward( &contextTab[hent], copy );
4214 }
4215 } else {
sewardjd86e3a22008-12-03 11:39:37 +00004216 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00004217 tl_assert(copy != example);
4218 *copy = *example;
4219 copy->next = contextTab[hent];
4220 contextTab[hent] = copy;
4221 stats__ctxt_tab_curr++;
4222 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4223 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4224 }
4225 return copy;
4226}
4227
4228static inline UWord ROLW ( UWord w, Int n )
4229{
4230 Int bpw = 8 * sizeof(UWord);
4231 w = (w << n) | (w >> (bpw-n));
4232 return w;
4233}
4234
4235__attribute__((noinline))
4236static RCEC* get_RCEC ( Thr* thr )
4237{
4238 UWord hash, i;
4239 RCEC example;
4240 example.magic = RCEC_MAGIC;
4241 example.rc = 0;
4242 example.rcX = 0;
florian195623b2013-01-22 00:25:05 +00004243 example.next = NULL;
njn6c83d5e2009-05-05 23:46:24 +00004244 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00004245 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00004246 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004247 hash ^= example.frames[i];
4248 hash = ROLW(hash, 19);
4249 }
njn6c83d5e2009-05-05 23:46:24 +00004250 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00004251 return ctxt__find_or_add( &example );
4252}
4253
4254///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00004255//// Part (2):
4256/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00004257///
4258
sewardjffce8152011-06-24 10:09:41 +00004259/* Records an access: a thread, a context (size & writeness) and the
4260 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
4261 2, 10 = 4, 11 = 8.
sewardjc5ea9962008-12-07 01:41:46 +00004262*/
sewardjffce8152011-06-24 10:09:41 +00004263typedef
4264 struct {
4265 RCEC* rcec;
4266 WordSetID locksHeldW;
4267 UInt thrid : SCALARTS_N_THRBITS;
4268 UInt szLg2B : 2;
4269 UInt isW : 1;
4270 }
4271 Thr_n_RCEC;
sewardjf98e1c02008-10-25 16:22:41 +00004272
sewardj849b0ed2008-12-21 10:43:10 +00004273#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00004274
4275typedef
philippecabdbb52015-04-20 21:33:16 +00004276 struct OldRef {
4277 struct OldRef *prev; // to refs older than this one
4278 struct OldRef *next; // to refs newer that this one
4279 Addr ga; // Address for which we record up to N_OLDREF_ACCS accesses.
sewardjffce8152011-06-24 10:09:41 +00004280 /* unused slots in this array have .thrid == 0, which is invalid */
sewardjf98e1c02008-10-25 16:22:41 +00004281 Thr_n_RCEC accs[N_OLDREF_ACCS];
4282 }
4283 OldRef;
philippecabdbb52015-04-20 21:33:16 +00004284/* We need ga in OldRef in order to remove OldRef from the sparsewa
4285 by key (i.e. ga) when re-using the lru OldRef. */
sewardjd86e3a22008-12-03 11:39:37 +00004286
philippe6643e962012-01-17 21:16:30 +00004287//////////// BEGIN OldRef pool allocator
4288static PoolAlloc* oldref_pool_allocator;
philippecabdbb52015-04-20 21:33:16 +00004289// Note: We only allocate elements in this pool allocator, we never free them.
4290// We stop allocating elements at VG_(clo_conflict_cache_size).
philippe6643e962012-01-17 21:16:30 +00004291//////////// END OldRef pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00004292
philippecabdbb52015-04-20 21:33:16 +00004293static OldRef mru;
4294static OldRef lru;
4295// A double linked list, chaining all OldREf in a mru/lru order.
4296// mru/lru are sentinel nodes.
4297// Whenever an oldref is re-used, its position is changed as the most recently
4298// used (i.e. pointed to by mru.prev).
4299// When a new oldref is needed, it is allocated from the pool
4300// if we have not yet reached --conflict-cache-size.
4301// Otherwise, if all oldref have already been allocated,
4302// the least recently used (i.e. pointed to by lru.next) is re-used.
4303// When an OldRef is used, it is moved as the most recently used entry
4304// (i.e. pointed to by mru.prev).
4305
4306// Removes r from the double linked list
4307// Note: we do not need to test for special cases such as
4308// NULL next or prev pointers, because we have sentinel nodes
4309// at both sides of the list. So, a node is always forward and
4310// backward linked.
4311static inline void OldRef_unchain(OldRef *r)
4312{
4313 r->next->prev = r->prev;
4314 r->prev->next = r->next;
4315}
4316
4317// Insert new as the newest OldRef
4318// Similarly to OldRef_unchain, no need to test for NULL
4319// pointers, as e.g. mru.prev is always guaranteed to point
4320// to a non NULL node (lru when the list is empty).
4321static inline void OldRef_newest(OldRef *new)
4322{
4323 new->next = &mru;
4324 new->prev = mru.prev;
4325 mru.prev = new;
4326 new->prev->next = new;
4327}
sewardjd86e3a22008-12-03 11:39:37 +00004328
sewardjbc307e52008-12-06 22:10:54 +00004329static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
sewardjbc307e52008-12-06 22:10:54 +00004330static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
philippecabdbb52015-04-20 21:33:16 +00004331/* Note: the nr of ref in the oldrefTree will always be equal to
4332 the nr of elements that were allocated from the OldRef pool allocator
4333 as we never free an OldRef : we just re-use them. */
4334
4335
4336/* allocates a new OldRef or re-use the lru one if all allowed OldRef
4337 have already been allocated. */
4338static OldRef* alloc_or_reuse_OldRef ( void )
4339{
4340 if (oldrefTreeN < HG_(clo_conflict_cache_size)) {
4341 oldrefTreeN++;
4342 return VG_(allocEltPA) ( oldref_pool_allocator );
4343 } else {
4344 Bool b;
4345 UWord valW;
4346 OldRef *oldref = lru.next;
4347
4348 OldRef_unchain(oldref);
4349 b = VG_(delFromSWA)( oldrefTree, &valW, oldref->ga );
4350 tl_assert(b);
4351 tl_assert (oldref == (OldRef*)valW);
4352
4353 for (UInt i = 0; i < N_OLDREF_ACCS; i++) {
4354 ThrID aThrID = oldref->accs[i].thrid;
4355 RCEC* aRef = oldref->accs[i].rcec;
4356 if (aRef) {
4357 tl_assert(aThrID != 0);
4358 stats__ctxt_rcdec3++;
4359 ctxt__rcdec( aRef );
4360 } else {
4361 tl_assert(aThrID == 0);
4362 }
4363 }
4364 return oldref;
4365 }
4366}
4367
sewardjf98e1c02008-10-25 16:22:41 +00004368
sewardj1669cc72008-12-13 01:20:21 +00004369inline static UInt min_UInt ( UInt a, UInt b ) {
4370 return a < b ? a : b;
4371}
4372
sewardja781be62008-12-08 00:12:28 +00004373/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4374 first interval is lower, 1 if the first interval is higher, and 0
4375 if there is any overlap. Redundant paranoia with casting is there
4376 following what looked distinctly like a bug in gcc-4.1.2, in which
4377 some of the comparisons were done signedly instead of
4378 unsignedly. */
4379/* Copied from exp-ptrcheck/sg_main.c */
4380static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4381 Addr a2, SizeT n2 ) {
4382 UWord a1w = (UWord)a1;
4383 UWord n1w = (UWord)n1;
4384 UWord a2w = (UWord)a2;
4385 UWord n2w = (UWord)n2;
4386 tl_assert(n1w > 0 && n2w > 0);
4387 if (a1w + n1w <= a2w) return -1L;
4388 if (a2w + n2w <= a1w) return 1L;
4389 return 0;
4390}
4391
sewardjc5ea9962008-12-07 01:41:46 +00004392static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00004393{
sewardjd86e3a22008-12-03 11:39:37 +00004394 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00004395 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00004396 Word i, j;
philippe40648e22015-04-11 11:42:22 +00004397 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004398 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004399
sewardjffce8152011-06-24 10:09:41 +00004400 tl_assert(thr);
4401 ThrID thrid = thr->thrid;
4402 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4403
4404 WordSetID locksHeldW = thr->hgthread->locksetW;
4405
sewardjc5ea9962008-12-07 01:41:46 +00004406 rcec = get_RCEC( thr );
4407 ctxt__rcinc(rcec);
4408
sewardjffce8152011-06-24 10:09:41 +00004409 UInt szLg2B = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004410 switch (szB) {
4411 /* This doesn't look particularly branch-predictor friendly. */
sewardjffce8152011-06-24 10:09:41 +00004412 case 1: szLg2B = 0; break;
4413 case 2: szLg2B = 1; break;
4414 case 4: szLg2B = 2; break;
4415 case 8: szLg2B = 3; break;
sewardjc5ea9962008-12-07 01:41:46 +00004416 default: tl_assert(0);
4417 }
4418
sewardjffce8152011-06-24 10:09:41 +00004419 /* Look in the map to see if we already have a record for this
4420 address. */
philippe40648e22015-04-11 11:42:22 +00004421 b = VG_(lookupSWA)( oldrefTree, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00004422
sewardjd86e3a22008-12-03 11:39:37 +00004423 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00004424
4425 /* We already have a record for this address. We now need to
sewardjffce8152011-06-24 10:09:41 +00004426 see if we have a stack trace pertaining to this (thrid, R/W,
sewardj849b0ed2008-12-21 10:43:10 +00004427 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00004428 ref = (OldRef*)valW;
philippecabdbb52015-04-20 21:33:16 +00004429
4430 tl_assert (ref->ga == a);
sewardjf98e1c02008-10-25 16:22:41 +00004431
sewardjf98e1c02008-10-25 16:22:41 +00004432 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004433 if (ref->accs[i].thrid != thrid)
sewardj849b0ed2008-12-21 10:43:10 +00004434 continue;
sewardjffce8152011-06-24 10:09:41 +00004435 if (ref->accs[i].szLg2B != szLg2B)
4436 continue;
4437 if (ref->accs[i].isW != (UInt)(isW & 1))
sewardj849b0ed2008-12-21 10:43:10 +00004438 continue;
4439 /* else we have a match, so stop looking. */
4440 break;
sewardjf98e1c02008-10-25 16:22:41 +00004441 }
4442
4443 if (i < N_OLDREF_ACCS) {
sewardjffce8152011-06-24 10:09:41 +00004444 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */
sewardjf98e1c02008-10-25 16:22:41 +00004445 if (i > 0) {
4446 Thr_n_RCEC tmp = ref->accs[i-1];
4447 ref->accs[i-1] = ref->accs[i];
4448 ref->accs[i] = tmp;
4449 i--;
4450 }
sewardjc5ea9962008-12-07 01:41:46 +00004451 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00004452 stats__ctxt_rcdec1++;
sewardjffce8152011-06-24 10:09:41 +00004453 ctxt__rcdec( ref->accs[i].rcec );
4454 tl_assert(ref->accs[i].thrid == thrid);
4455 /* Update the RCEC and the W-held lockset. */
4456 ref->accs[i].rcec = rcec;
4457 ref->accs[i].locksHeldW = locksHeldW;
sewardjf98e1c02008-10-25 16:22:41 +00004458 } else {
sewardjffce8152011-06-24 10:09:41 +00004459 /* No entry for this (thread, R/W, size, nWHeld) quad.
4460 Shuffle all of them down one slot, and put the new entry
4461 at the start of the array. */
4462 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
sewardjf98e1c02008-10-25 16:22:41 +00004463 /* the last slot is in use. We must dec the rc on the
4464 associated rcec. */
4465 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
4466 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00004467 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
4468 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjffce8152011-06-24 10:09:41 +00004469 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
sewardjf98e1c02008-10-25 16:22:41 +00004470 } else {
4471 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
4472 }
4473 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
4474 ref->accs[j] = ref->accs[j-1];
sewardjffce8152011-06-24 10:09:41 +00004475 ref->accs[0].thrid = thrid;
4476 ref->accs[0].szLg2B = szLg2B;
4477 ref->accs[0].isW = (UInt)(isW & 1);
4478 ref->accs[0].locksHeldW = locksHeldW;
4479 ref->accs[0].rcec = rcec;
4480 /* thrid==0 is used to signify an empty slot, so we can't
4481 add zero thrid (such a ThrID is invalid anyway). */
4482 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
sewardjf98e1c02008-10-25 16:22:41 +00004483 }
4484
philippecabdbb52015-04-20 21:33:16 +00004485 OldRef_unchain(ref);
4486 OldRef_newest(ref);
sewardjf98e1c02008-10-25 16:22:41 +00004487
4488 } else {
4489
4490 /* We don't have a record for this address. Create a new one. */
philippecabdbb52015-04-20 21:33:16 +00004491 ref = alloc_or_reuse_OldRef();
4492 ref->ga = a;
sewardjffce8152011-06-24 10:09:41 +00004493 ref->accs[0].thrid = thrid;
4494 ref->accs[0].szLg2B = szLg2B;
4495 ref->accs[0].isW = (UInt)(isW & 1);
4496 ref->accs[0].locksHeldW = locksHeldW;
4497 ref->accs[0].rcec = rcec;
4498
4499 /* thrid==0 is used to signify an empty slot, so we can't
4500 add zero thrid (such a ThrID is invalid anyway). */
4501 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4502
4503 /* Clear out the rest of the entries */
sewardjf98e1c02008-10-25 16:22:41 +00004504 for (j = 1; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004505 ref->accs[j].rcec = NULL;
4506 ref->accs[j].thrid = 0;
4507 ref->accs[j].szLg2B = 0;
4508 ref->accs[j].isW = 0;
4509 ref->accs[j].locksHeldW = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004510 }
sewardjbc307e52008-12-06 22:10:54 +00004511 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
philippecabdbb52015-04-20 21:33:16 +00004512 OldRef_newest (ref);
sewardjf98e1c02008-10-25 16:22:41 +00004513 }
4514}
4515
4516
sewardjffce8152011-06-24 10:09:41 +00004517/* Extract info from the conflicting-access machinery. */
sewardjc5ea9962008-12-07 01:41:46 +00004518Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjffce8152011-06-24 10:09:41 +00004519 /*OUT*/Thr** resThr,
4520 /*OUT*/SizeT* resSzB,
4521 /*OUT*/Bool* resIsW,
4522 /*OUT*/WordSetID* locksHeldW,
sewardjc5ea9962008-12-07 01:41:46 +00004523 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00004524{
sewardja781be62008-12-08 00:12:28 +00004525 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00004526 OldRef* ref;
philippe40648e22015-04-11 11:42:22 +00004527 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004528 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004529
sewardjffce8152011-06-24 10:09:41 +00004530 ThrID cand_thrid;
4531 RCEC* cand_rcec;
4532 Bool cand_isW;
4533 SizeT cand_szB;
4534 WordSetID cand_locksHeldW;
4535 Addr cand_a;
sewardja781be62008-12-08 00:12:28 +00004536
4537 Addr toCheck[15];
4538 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004539
4540 tl_assert(thr);
4541 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00004542
sewardjffce8152011-06-24 10:09:41 +00004543 ThrID thrid = thr->thrid;
4544
sewardja781be62008-12-08 00:12:28 +00004545 toCheck[nToCheck++] = a;
4546 for (i = -7; i < (Word)szB; i++) {
4547 if (i != 0)
4548 toCheck[nToCheck++] = a + i;
4549 }
4550 tl_assert(nToCheck <= 15);
4551
4552 /* Now see if we can find a suitable matching event for
4553 any of the addresses in toCheck[0 .. nToCheck-1]. */
4554 for (j = 0; j < nToCheck; j++) {
4555
4556 cand_a = toCheck[j];
4557 // VG_(printf)("test %ld %p\n", j, cand_a);
4558
philippe40648e22015-04-11 11:42:22 +00004559 b = VG_(lookupSWA)( oldrefTree, &valW, cand_a );
sewardja781be62008-12-08 00:12:28 +00004560 if (!b)
4561 continue;
4562
sewardjd86e3a22008-12-03 11:39:37 +00004563 ref = (OldRef*)valW;
sewardjffce8152011-06-24 10:09:41 +00004564 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
sewardjf98e1c02008-10-25 16:22:41 +00004565
sewardjffce8152011-06-24 10:09:41 +00004566 cand_thrid = 0; /* invalid; see comments in event_map_bind */
4567 cand_rcec = NULL;
4568 cand_isW = False;
4569 cand_szB = 0;
4570 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
sewardjf98e1c02008-10-25 16:22:41 +00004571
sewardjc5ea9962008-12-07 01:41:46 +00004572 for (i = 0; i < N_OLDREF_ACCS; i++) {
4573 Thr_n_RCEC* cand = &ref->accs[i];
sewardjffce8152011-06-24 10:09:41 +00004574 cand_rcec = cand->rcec;
4575 cand_thrid = cand->thrid;
4576 cand_isW = (Bool)cand->isW;
4577 cand_szB = 1 << cand->szLg2B;
4578 cand_locksHeldW = cand->locksHeldW;
sewardjc5ea9962008-12-07 01:41:46 +00004579
sewardjffce8152011-06-24 10:09:41 +00004580 if (cand_thrid == 0)
sewardjc5ea9962008-12-07 01:41:46 +00004581 /* This slot isn't in use. Ignore it. */
4582 continue;
4583
sewardjffce8152011-06-24 10:09:41 +00004584 if (cand_thrid == thrid)
sewardjc5ea9962008-12-07 01:41:46 +00004585 /* This is an access by the same thread, but we're only
4586 interested in accesses from other threads. Ignore. */
4587 continue;
4588
4589 if ((!cand_isW) && (!isW))
4590 /* We don't want to report a read racing against another
4591 read; that's stupid. So in this case move on. */
4592 continue;
4593
sewardja781be62008-12-08 00:12:28 +00004594 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
4595 /* No overlap with the access we're asking about. Ignore. */
4596 continue;
4597
sewardjc5ea9962008-12-07 01:41:46 +00004598 /* We have a match. Stop searching. */
4599 break;
4600 }
4601
4602 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
4603
sewardja781be62008-12-08 00:12:28 +00004604 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00004605 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00004606 /* return with success */
sewardjffce8152011-06-24 10:09:41 +00004607 tl_assert(cand_thrid);
sewardja781be62008-12-08 00:12:28 +00004608 tl_assert(cand_rcec);
4609 tl_assert(cand_rcec->magic == RCEC_MAGIC);
4610 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00004611 /* Count how many non-zero frames we have. */
4612 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4613 for (n = 0; n < maxNFrames; n++) {
4614 if (0 == cand_rcec->frames[n]) break;
4615 }
sewardjffce8152011-06-24 10:09:41 +00004616 *resEC = VG_(make_ExeContext_from_StackTrace)
4617 (cand_rcec->frames, n);
4618 *resThr = Thr__from_ThrID(cand_thrid);
4619 *resSzB = cand_szB;
4620 *resIsW = cand_isW;
4621 *locksHeldW = cand_locksHeldW;
sewardja781be62008-12-08 00:12:28 +00004622 return True;
4623 }
sewardjc5ea9962008-12-07 01:41:46 +00004624
sewardja781be62008-12-08 00:12:28 +00004625 /* consider next address in toCheck[] */
4626 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00004627
sewardja781be62008-12-08 00:12:28 +00004628 /* really didn't find anything. */
4629 return False;
sewardjf98e1c02008-10-25 16:22:41 +00004630}
4631
4632static void event_map_init ( void )
4633{
4634 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00004635
philippe6643e962012-01-17 21:16:30 +00004636 /* Context (RCEC) pool allocator */
4637 rcec_pool_allocator = VG_(newPA) (
4638 sizeof(RCEC),
4639 1000 /* RCECs per pool */,
4640 HG_(zalloc),
4641 "libhb.event_map_init.1 (RCEC pools)",
4642 HG_(free)
4643 );
sewardjd86e3a22008-12-03 11:39:37 +00004644
4645 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00004646 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00004647 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00004648 N_RCEC_TAB * sizeof(RCEC*) );
sewardjf98e1c02008-10-25 16:22:41 +00004649 for (i = 0; i < N_RCEC_TAB; i++)
4650 contextTab[i] = NULL;
4651
philippe6643e962012-01-17 21:16:30 +00004652 /* Oldref pool allocator */
4653 oldref_pool_allocator = VG_(newPA)(
4654 sizeof(OldRef),
4655 1000 /* OldRefs per pool */,
4656 HG_(zalloc),
4657 "libhb.event_map_init.3 (OldRef pools)",
4658 HG_(free)
4659 );
sewardjd86e3a22008-12-03 11:39:37 +00004660
sewardjd86e3a22008-12-03 11:39:37 +00004661 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00004662 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00004663 oldrefTree = VG_(newSWA)(
4664 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00004665 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00004666 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00004667 );
sewardjf98e1c02008-10-25 16:22:41 +00004668
sewardjf98e1c02008-10-25 16:22:41 +00004669 oldrefTreeN = 0;
philippecabdbb52015-04-20 21:33:16 +00004670 mru.prev = &lru;
4671 mru.next = NULL;
4672 lru.prev = NULL;
4673 lru.next = &mru;
4674 for (i = 0; i < N_OLDREF_ACCS; i++) {
4675 mru.accs[i] = (Thr_n_RCEC) {.rcec = NULL,
4676 .locksHeldW = 0,
4677 .thrid = 0,
4678 .szLg2B = 0,
4679 .isW = 0};
4680 lru.accs[i] = mru.accs[i];
4681 }
sewardjf98e1c02008-10-25 16:22:41 +00004682}
4683
philippecabdbb52015-04-20 21:33:16 +00004684static void event_map__check_reference_counts ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004685{
4686 RCEC* rcec;
4687 OldRef* oldref;
4688 Word i;
4689 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00004690 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00004691
4692 /* Set the 'check' reference counts to zero. Also, optionally
4693 check that the real reference counts are non-zero. We allow
4694 these to fall to zero before a GC, but the GC must get rid of
4695 all those that are zero, hence none should be zero after a
4696 GC. */
4697 for (i = 0; i < N_RCEC_TAB; i++) {
4698 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4699 nEnts++;
4700 tl_assert(rcec);
4701 tl_assert(rcec->magic == RCEC_MAGIC);
sewardjf98e1c02008-10-25 16:22:41 +00004702 rcec->rcX = 0;
4703 }
4704 }
4705
4706 /* check that the stats are sane */
4707 tl_assert(nEnts == stats__ctxt_tab_curr);
4708 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4709
4710 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00004711 VG_(initIterSWA)( oldrefTree );
4712 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004713 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004714 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004715 ThrID aThrID = oldref->accs[i].thrid;
4716 RCEC* aRef = oldref->accs[i].rcec;
4717 if (aThrID != 0) {
sewardjc5ea9962008-12-07 01:41:46 +00004718 tl_assert(aRef);
4719 tl_assert(aRef->magic == RCEC_MAGIC);
4720 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00004721 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004722 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00004723 }
4724 }
4725 }
4726
4727 /* compare check ref counts with actual */
4728 for (i = 0; i < N_RCEC_TAB; i++) {
4729 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4730 tl_assert(rcec->rc == rcec->rcX);
4731 }
4732 }
4733}
4734
sewardj8fd92d32008-11-20 23:17:01 +00004735__attribute__((noinline))
philippecabdbb52015-04-20 21:33:16 +00004736static void do_RCEC_GC ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004737{
philippecabdbb52015-04-20 21:33:16 +00004738 UInt i;
sewardjf98e1c02008-10-25 16:22:41 +00004739
philippecabdbb52015-04-20 21:33:16 +00004740 if (VG_(clo_stats)) {
4741 static UInt ctr = 1;
4742 VG_(message)(Vg_DebugMsg,
4743 "libhb: RCEC GC: #%u %lu slots,"
4744 " %lu cur ents(ref'd %lu),"
4745 " %lu max ents\n",
4746 ctr++,
4747 (UWord)N_RCEC_TAB,
4748 stats__ctxt_tab_curr, RCEC_referenced,
4749 stats__ctxt_tab_max );
sewardjf98e1c02008-10-25 16:22:41 +00004750 }
philippecabdbb52015-04-20 21:33:16 +00004751 tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004752
4753 /* Throw away all RCECs with zero reference counts */
4754 for (i = 0; i < N_RCEC_TAB; i++) {
4755 RCEC** pp = &contextTab[i];
4756 RCEC* p = *pp;
4757 while (p) {
4758 if (p->rc == 0) {
4759 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004760 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004761 p = *pp;
4762 tl_assert(stats__ctxt_tab_curr > 0);
philippe06bc23a2015-04-17 21:19:43 +00004763 stats__ctxt_rcdec_discards++;
sewardjf98e1c02008-10-25 16:22:41 +00004764 stats__ctxt_tab_curr--;
4765 } else {
4766 pp = &p->next;
4767 p = p->next;
4768 }
4769 }
4770 }
4771
philippecabdbb52015-04-20 21:33:16 +00004772 tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004773}
4774
sewardjf98e1c02008-10-25 16:22:41 +00004775/////////////////////////////////////////////////////////
4776// //
4777// Core MSM //
4778// //
4779/////////////////////////////////////////////////////////
4780
sewardj23f12002009-07-24 08:45:08 +00004781/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4782 Nov 08, and again after [...],
4783 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004784
sewardj23f12002009-07-24 08:45:08 +00004785static ULong stats__msmcread = 0;
4786static ULong stats__msmcread_change = 0;
4787static ULong stats__msmcwrite = 0;
4788static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004789
sewardj8ab2c132009-08-02 09:34:35 +00004790/* Some notes on the H1 history mechanism:
4791
4792 Transition rules are:
4793
4794 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4795 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4796
4797 After any access by a thread T to a location L, L's constraint pair
4798 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4799
4800 After a race by thread T conflicting with some previous access by
4801 some other thread U, for a location with constraint (before
4802 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4803 which the previously access lies.
4804
4805 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4806 are compared so as to find out which thread(s) this access
4807 conflicts with. Once that is established, we also require the
4808 pre-update Cw for the location, so we can index into it for those
4809 threads, to get the scalar clock values for the point at which the
4810 former accesses were made. (In fact we only bother to do any of
4811 this for an arbitrarily chosen one of the conflicting threads, as
4812 that's simpler, it avoids flooding the user with vast amounts of
4813 mostly useless information, and because the program is wrong if it
4814 contains any races at all -- so we don't really need to show all
4815 conflicting access pairs initially, so long as we only show none if
4816 none exist).
4817
4818 ---
4819
4820 That requires the auxiliary proof that
4821
4822 (Cr `join` Kw)[T] == Kw[T]
4823
4824 Why should that be true? Because for any thread T, Kw[T] >= the
4825 scalar clock value for T known by any other thread. In other
4826 words, because T's value for its own scalar clock is at least as up
4827 to date as the value for it known by any other thread (that is true
4828 for both the R- and W- scalar clocks). Hence no other thread will
4829 be able to feed in a value for that element (indirectly via a
4830 constraint) which will exceed Kw[T], and hence the join cannot
4831 cause that particular element to advance.
4832*/
4833
sewardjf98e1c02008-10-25 16:22:41 +00004834__attribute__((noinline))
4835static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004836 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00004837 VtsID Cfailed,
4838 VtsID Kfailed,
4839 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00004840{
sewardjc5ea9962008-12-07 01:41:46 +00004841 /* Call here to report a race. We just hand it onwards to
4842 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004843 error is going to be collected, then, at history_level 2, that
4844 queries the conflicting-event map. The alternative would be to
4845 query it right here. But that causes a lot of pointless queries
4846 for errors which will shortly be discarded as duplicates, and
4847 can become a performance overhead; so we defer the query until
4848 we know the error is not a duplicate. */
4849
4850 /* Stacks for the bounds of the (or one of the) conflicting
4851 segment(s). These are only set at history_level 1. */
4852 ExeContext* hist1_seg_start = NULL;
4853 ExeContext* hist1_seg_end = NULL;
4854 Thread* hist1_conf_thr = NULL;
4855
4856 tl_assert(acc_thr);
sewardj60626642011-03-10 15:14:37 +00004857 tl_assert(acc_thr->hgthread);
4858 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
sewardj23f12002009-07-24 08:45:08 +00004859 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4860
4861 if (HG_(clo_history_level) == 1) {
4862 Bool found;
4863 Word firstIx, lastIx;
4864 ULong_n_EC key;
4865
4866 /* At history_level 1, we must round up the relevant stack-pair
4867 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00004868 deferring it is complex; we can't (easily) put Kfailed and
4869 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00004870 getting tied up in difficulties with VtsID reference
4871 counting. So just do it now. */
4872 Thr* confThr;
4873 ULong confTym = 0;
4874 /* Which thread are we in conflict with? There may be more than
4875 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4876 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00004877 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00004878 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00004879 conflict (semantics of return value of
4880 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4881 called us, just checked exactly this -- that there was in
4882 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00004883 tl_assert(confThr);
4884
4885 /* Get the scalar clock value that the conflicting thread
4886 introduced into the constraint. A careful examination of the
4887 base machine rules shows that this must be the same as the
4888 conflicting thread's scalar clock when it created this
4889 constraint. Hence we know the scalar clock of the
4890 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00004891 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00004892
4893 /* Using this scalar clock, index into the conflicting thread's
4894 collection of stack traces made each time its vector clock
4895 (hence its scalar clock) changed. This gives the stack
4896 traces at the start and end of the conflicting segment (well,
4897 as per comment just above, of one of the conflicting
4898 segments, if there are more than one). */
4899 key.ull = confTym;
4900 key.ec = NULL;
4901 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00004902 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004903 firstIx = lastIx = 0;
4904 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00004905 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004906 &key, &firstIx, &lastIx,
florian6bd9dc12012-11-23 16:17:43 +00004907 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
sewardj23f12002009-07-24 08:45:08 +00004908 );
sewardj8ab2c132009-08-02 09:34:35 +00004909 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00004910 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00004911 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00004912 confThr, confTym, found, firstIx, lastIx);
4913 /* We can't indefinitely collect stack traces at VTS
4914 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00004915 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00004916 ones, which in turn means we might fail to find index value
4917 confTym in the array. */
4918 if (found) {
4919 ULong_n_EC *pair_start, *pair_end;
4920 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00004921 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00004922 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004923 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00004924 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00004925 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004926 lastIx+1 );
4927 /* from properties of VG_(lookupXA) and the comparison fn used: */
4928 tl_assert(pair_start->ull < pair_end->ull);
4929 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004930 /* Could do a bit better here. It may be that pair_end
4931 doesn't have a stack, but the following entries in the
4932 array have the same scalar Kw and to have a stack. So
4933 we should search a bit further along the array than
4934 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00004935 } else {
sewardjffce8152011-06-24 10:09:41 +00004936 if (!confThr->llexit_done)
sewardj23f12002009-07-24 08:45:08 +00004937 hist1_seg_end = main_get_EC( confThr );
4938 }
4939 // seg_start could be NULL iff this is the first stack in the thread
4940 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4941 //if (seg_end) VG_(pp_ExeContext)(seg_end);
sewardj60626642011-03-10 15:14:37 +00004942 hist1_conf_thr = confThr->hgthread;
sewardj23f12002009-07-24 08:45:08 +00004943 }
4944 }
4945
sewardj60626642011-03-10 15:14:37 +00004946 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00004947 szB, isWrite,
4948 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00004949}
4950
4951static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00004952 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00004953 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00004954 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4955 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00004956}
4957
4958
4959/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00004960static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004961 /* The following are only needed for
4962 creating error reports. */
4963 Thr* acc_thr,
4964 Addr acc_addr, SizeT szB )
4965{
4966 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004967 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00004968
4969 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004970 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004971 tl_assert(is_sane_SVal_C(svOld));
4972 }
4973
sewardj1c0ce7a2009-07-01 08:10:49 +00004974 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004975 VtsID tviR = acc_thr->viR;
4976 VtsID tviW = acc_thr->viW;
4977 VtsID rmini = SVal__unC_Rmin(svOld);
4978 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004979 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4980 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004981 /* no race */
4982 /* Note: RWLOCK subtlety: use tviW, not tviR */
4983 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4984 goto out;
4985 } else {
sewardjb0e009d2008-11-19 16:35:15 +00004986 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004987 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4988 tl_assert(leqxx);
4989 // same as in non-race case
4990 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4991 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004992 rmini, /* Cfailed */
4993 tviR, /* Kfailed */
4994 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004995 goto out;
4996 }
4997 }
4998 if (SVal__isA(svOld)) {
4999 /* reading no-access memory (sigh); leave unchanged */
5000 /* check for no pollution */
5001 tl_assert(svOld == SVal_NOACCESS);
5002 svNew = SVal_NOACCESS;
5003 goto out;
5004 }
sewardj23f12002009-07-24 08:45:08 +00005005 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005006 tl_assert(0);
5007
5008 out:
sewardj8f5374e2008-12-07 11:40:17 +00005009 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005010 tl_assert(is_sane_SVal_C(svNew));
5011 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005012 if (UNLIKELY(svNew != svOld)) {
5013 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005014 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005015 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005016 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005017 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005018 }
5019 }
5020 return svNew;
5021}
5022
5023
5024/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00005025static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005026 /* The following are only needed for
5027 creating error reports. */
5028 Thr* acc_thr,
5029 Addr acc_addr, SizeT szB )
5030{
5031 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005032 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00005033
5034 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005035 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005036 tl_assert(is_sane_SVal_C(svOld));
5037 }
5038
sewardj1c0ce7a2009-07-01 08:10:49 +00005039 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005040 VtsID tviW = acc_thr->viW;
5041 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005042 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5043 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005044 /* no race */
5045 svNew = SVal__mkC( tviW, tviW );
5046 goto out;
5047 } else {
5048 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00005049 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005050 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5051 tl_assert(leqxx);
5052 // same as in non-race case
5053 // proof: in the non-race case, we have
5054 // rmini <= wmini (invar on constraints)
5055 // tviW <= tviR (invar on thread clocks)
5056 // wmini <= tviW (from run-time check)
5057 // hence from transitivity of <= we have
5058 // rmini <= wmini <= tviW
5059 // and so join(rmini,tviW) == tviW
5060 // and join(wmini,tviW) == tviW
5061 // qed.
5062 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5063 VtsID__join2(wmini, tviW) );
5064 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005065 wmini, /* Cfailed */
5066 tviW, /* Kfailed */
5067 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005068 goto out;
5069 }
5070 }
5071 if (SVal__isA(svOld)) {
5072 /* writing no-access memory (sigh); leave unchanged */
5073 /* check for no pollution */
5074 tl_assert(svOld == SVal_NOACCESS);
5075 svNew = SVal_NOACCESS;
5076 goto out;
5077 }
sewardj23f12002009-07-24 08:45:08 +00005078 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005079 tl_assert(0);
5080
5081 out:
sewardj8f5374e2008-12-07 11:40:17 +00005082 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005083 tl_assert(is_sane_SVal_C(svNew));
5084 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005085 if (UNLIKELY(svNew != svOld)) {
5086 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005087 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005088 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005089 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005090 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005091 }
5092 }
5093 return svNew;
5094}
5095
5096
5097/////////////////////////////////////////////////////////
5098// //
5099// Apply core MSM to specific memory locations //
5100// //
5101/////////////////////////////////////////////////////////
5102
sewardj23f12002009-07-24 08:45:08 +00005103/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005104
sewardj23f12002009-07-24 08:45:08 +00005105static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005106 CacheLine* cl;
5107 UWord cloff, tno, toff;
5108 SVal svOld, svNew;
5109 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005110 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005111 cl = get_cacheline(a);
5112 cloff = get_cacheline_offset(a);
5113 tno = get_treeno(a);
5114 toff = get_tree_offset(a); /* == 0 .. 7 */
5115 descr = cl->descrs[tno];
5116 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5117 SVal* tree = &cl->svals[tno << 3];
5118 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005119 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005120 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5121 }
5122 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005123 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005124 if (CHECK_ZSM)
5125 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005126 cl->svals[cloff] = svNew;
5127}
5128
sewardj23f12002009-07-24 08:45:08 +00005129static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005130 CacheLine* cl;
5131 UWord cloff, tno, toff;
5132 SVal svOld, svNew;
5133 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005134 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005135 cl = get_cacheline(a);
5136 cloff = get_cacheline_offset(a);
5137 tno = get_treeno(a);
5138 toff = get_tree_offset(a); /* == 0 .. 7 */
5139 descr = cl->descrs[tno];
5140 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5141 SVal* tree = &cl->svals[tno << 3];
5142 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005143 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005144 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5145 }
5146 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005147 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005148 if (CHECK_ZSM)
5149 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005150 cl->svals[cloff] = svNew;
5151}
5152
sewardj23f12002009-07-24 08:45:08 +00005153/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005154
sewardj23f12002009-07-24 08:45:08 +00005155static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005156 CacheLine* cl;
5157 UWord cloff, tno, toff;
5158 SVal svOld, svNew;
5159 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005160 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005161 if (UNLIKELY(!aligned16(a))) goto slowcase;
5162 cl = get_cacheline(a);
5163 cloff = get_cacheline_offset(a);
5164 tno = get_treeno(a);
5165 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5166 descr = cl->descrs[tno];
5167 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5168 if (valid_value_is_below_me_16(descr, toff)) {
5169 goto slowcase;
5170 } else {
5171 SVal* tree = &cl->svals[tno << 3];
5172 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5173 }
sewardj8f5374e2008-12-07 11:40:17 +00005174 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005175 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5176 }
5177 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005178 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005179 if (CHECK_ZSM)
5180 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005181 cl->svals[cloff] = svNew;
5182 return;
5183 slowcase: /* misaligned, or must go further down the tree */
5184 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005185 zsm_sapply08__msmcread( thr, a + 0 );
5186 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005187}
5188
sewardj23f12002009-07-24 08:45:08 +00005189static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005190 CacheLine* cl;
5191 UWord cloff, tno, toff;
5192 SVal svOld, svNew;
5193 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005194 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005195 if (UNLIKELY(!aligned16(a))) goto slowcase;
5196 cl = get_cacheline(a);
5197 cloff = get_cacheline_offset(a);
5198 tno = get_treeno(a);
5199 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5200 descr = cl->descrs[tno];
5201 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5202 if (valid_value_is_below_me_16(descr, toff)) {
5203 goto slowcase;
5204 } else {
5205 SVal* tree = &cl->svals[tno << 3];
5206 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5207 }
sewardj8f5374e2008-12-07 11:40:17 +00005208 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005209 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5210 }
5211 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005212 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005213 if (CHECK_ZSM)
5214 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005215 cl->svals[cloff] = svNew;
5216 return;
5217 slowcase: /* misaligned, or must go further down the tree */
5218 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005219 zsm_sapply08__msmcwrite( thr, a + 0 );
5220 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005221}
5222
sewardj23f12002009-07-24 08:45:08 +00005223/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005224
sewardj23f12002009-07-24 08:45:08 +00005225static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005226 CacheLine* cl;
5227 UWord cloff, tno, toff;
5228 SVal svOld, svNew;
5229 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005230 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005231 if (UNLIKELY(!aligned32(a))) goto slowcase;
5232 cl = get_cacheline(a);
5233 cloff = get_cacheline_offset(a);
5234 tno = get_treeno(a);
5235 toff = get_tree_offset(a); /* == 0 or 4 */
5236 descr = cl->descrs[tno];
5237 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5238 if (valid_value_is_above_me_32(descr, toff)) {
5239 SVal* tree = &cl->svals[tno << 3];
5240 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5241 } else {
5242 goto slowcase;
5243 }
sewardj8f5374e2008-12-07 11:40:17 +00005244 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005245 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5246 }
5247 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005248 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005249 if (CHECK_ZSM)
5250 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005251 cl->svals[cloff] = svNew;
5252 return;
5253 slowcase: /* misaligned, or must go further down the tree */
5254 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005255 zsm_sapply16__msmcread( thr, a + 0 );
5256 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005257}
5258
sewardj23f12002009-07-24 08:45:08 +00005259static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005260 CacheLine* cl;
5261 UWord cloff, tno, toff;
5262 SVal svOld, svNew;
5263 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005264 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005265 if (UNLIKELY(!aligned32(a))) goto slowcase;
5266 cl = get_cacheline(a);
5267 cloff = get_cacheline_offset(a);
5268 tno = get_treeno(a);
5269 toff = get_tree_offset(a); /* == 0 or 4 */
5270 descr = cl->descrs[tno];
5271 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5272 if (valid_value_is_above_me_32(descr, toff)) {
5273 SVal* tree = &cl->svals[tno << 3];
5274 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5275 } else {
5276 goto slowcase;
5277 }
sewardj8f5374e2008-12-07 11:40:17 +00005278 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005279 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5280 }
5281 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005282 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005283 if (CHECK_ZSM)
5284 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005285 cl->svals[cloff] = svNew;
5286 return;
5287 slowcase: /* misaligned, or must go further down the tree */
5288 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005289 zsm_sapply16__msmcwrite( thr, a + 0 );
5290 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005291}
5292
sewardj23f12002009-07-24 08:45:08 +00005293/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005294
sewardj23f12002009-07-24 08:45:08 +00005295static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005296 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005297 UWord cloff, tno;
5298 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005299 SVal svOld, svNew;
5300 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005301 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005302 if (UNLIKELY(!aligned64(a))) goto slowcase;
5303 cl = get_cacheline(a);
5304 cloff = get_cacheline_offset(a);
5305 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005306 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005307 descr = cl->descrs[tno];
5308 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5309 goto slowcase;
5310 }
5311 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005312 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005313 if (CHECK_ZSM)
5314 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005315 cl->svals[cloff] = svNew;
5316 return;
5317 slowcase: /* misaligned, or must go further down the tree */
5318 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005319 zsm_sapply32__msmcread( thr, a + 0 );
5320 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005321}
5322
sewardj23f12002009-07-24 08:45:08 +00005323static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005324 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005325 UWord cloff, tno;
5326 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005327 SVal svOld, svNew;
5328 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005329 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005330 if (UNLIKELY(!aligned64(a))) goto slowcase;
5331 cl = get_cacheline(a);
5332 cloff = get_cacheline_offset(a);
5333 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005334 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005335 descr = cl->descrs[tno];
5336 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5337 goto slowcase;
5338 }
5339 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005340 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005341 if (CHECK_ZSM)
5342 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005343 cl->svals[cloff] = svNew;
5344 return;
5345 slowcase: /* misaligned, or must go further down the tree */
5346 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005347 zsm_sapply32__msmcwrite( thr, a + 0 );
5348 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005349}
5350
sewardj23f12002009-07-24 08:45:08 +00005351/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005352
5353static
sewardj23f12002009-07-24 08:45:08 +00005354void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005355 CacheLine* cl;
5356 UWord cloff, tno, toff;
5357 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005358 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005359 cl = get_cacheline(a);
5360 cloff = get_cacheline_offset(a);
5361 tno = get_treeno(a);
5362 toff = get_tree_offset(a); /* == 0 .. 7 */
5363 descr = cl->descrs[tno];
5364 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5365 SVal* tree = &cl->svals[tno << 3];
5366 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005367 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005368 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5369 }
5370 tl_assert(svNew != SVal_INVALID);
5371 cl->svals[cloff] = svNew;
5372}
5373
sewardj23f12002009-07-24 08:45:08 +00005374/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005375
5376static
sewardj23f12002009-07-24 08:45:08 +00005377void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005378 CacheLine* cl;
5379 UWord cloff, tno, toff;
5380 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005381 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005382 if (UNLIKELY(!aligned16(a))) goto slowcase;
5383 cl = get_cacheline(a);
5384 cloff = get_cacheline_offset(a);
5385 tno = get_treeno(a);
5386 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5387 descr = cl->descrs[tno];
5388 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5389 if (valid_value_is_below_me_16(descr, toff)) {
5390 /* Writing at this level. Need to fix up 'descr'. */
5391 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5392 /* At this point, the tree does not match cl->descr[tno] any
5393 more. The assignments below will fix it up. */
5394 } else {
5395 /* We can't indiscriminately write on the w16 node as in the
5396 w64 case, as that might make the node inconsistent with
5397 its parent. So first, pull down to this level. */
5398 SVal* tree = &cl->svals[tno << 3];
5399 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005400 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005401 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5402 }
5403 }
5404 tl_assert(svNew != SVal_INVALID);
5405 cl->svals[cloff + 0] = svNew;
5406 cl->svals[cloff + 1] = SVal_INVALID;
5407 return;
5408 slowcase: /* misaligned */
5409 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005410 zsm_swrite08( a + 0, svNew );
5411 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005412}
5413
sewardj23f12002009-07-24 08:45:08 +00005414/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005415
5416static
sewardj23f12002009-07-24 08:45:08 +00005417void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005418 CacheLine* cl;
5419 UWord cloff, tno, toff;
5420 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005421 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005422 if (UNLIKELY(!aligned32(a))) goto slowcase;
5423 cl = get_cacheline(a);
5424 cloff = get_cacheline_offset(a);
5425 tno = get_treeno(a);
5426 toff = get_tree_offset(a); /* == 0 or 4 */
5427 descr = cl->descrs[tno];
5428 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5429 if (valid_value_is_above_me_32(descr, toff)) {
5430 /* We can't indiscriminately write on the w32 node as in the
5431 w64 case, as that might make the node inconsistent with
5432 its parent. So first, pull down to this level. */
5433 SVal* tree = &cl->svals[tno << 3];
5434 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005435 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005436 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5437 } else {
5438 /* Writing at this level. Need to fix up 'descr'. */
5439 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5440 /* At this point, the tree does not match cl->descr[tno] any
5441 more. The assignments below will fix it up. */
5442 }
5443 }
5444 tl_assert(svNew != SVal_INVALID);
5445 cl->svals[cloff + 0] = svNew;
5446 cl->svals[cloff + 1] = SVal_INVALID;
5447 cl->svals[cloff + 2] = SVal_INVALID;
5448 cl->svals[cloff + 3] = SVal_INVALID;
5449 return;
5450 slowcase: /* misaligned */
5451 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005452 zsm_swrite16( a + 0, svNew );
5453 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005454}
5455
sewardj23f12002009-07-24 08:45:08 +00005456/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005457
5458static
sewardj23f12002009-07-24 08:45:08 +00005459void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005460 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005461 UWord cloff, tno;
5462 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00005463 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005464 if (UNLIKELY(!aligned64(a))) goto slowcase;
5465 cl = get_cacheline(a);
5466 cloff = get_cacheline_offset(a);
5467 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005468 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005469 cl->descrs[tno] = TREE_DESCR_64;
5470 tl_assert(svNew != SVal_INVALID);
5471 cl->svals[cloff + 0] = svNew;
5472 cl->svals[cloff + 1] = SVal_INVALID;
5473 cl->svals[cloff + 2] = SVal_INVALID;
5474 cl->svals[cloff + 3] = SVal_INVALID;
5475 cl->svals[cloff + 4] = SVal_INVALID;
5476 cl->svals[cloff + 5] = SVal_INVALID;
5477 cl->svals[cloff + 6] = SVal_INVALID;
5478 cl->svals[cloff + 7] = SVal_INVALID;
5479 return;
5480 slowcase: /* misaligned */
5481 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005482 zsm_swrite32( a + 0, svNew );
5483 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005484}
5485
sewardj23f12002009-07-24 08:45:08 +00005486/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005487
5488static
sewardj23f12002009-07-24 08:45:08 +00005489SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005490 CacheLine* cl;
5491 UWord cloff, tno, toff;
5492 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005493 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005494 cl = get_cacheline(a);
5495 cloff = get_cacheline_offset(a);
5496 tno = get_treeno(a);
5497 toff = get_tree_offset(a); /* == 0 .. 7 */
5498 descr = cl->descrs[tno];
5499 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5500 SVal* tree = &cl->svals[tno << 3];
5501 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5502 }
5503 return cl->svals[cloff];
5504}
5505
sewardj23f12002009-07-24 08:45:08 +00005506static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00005507 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00005508 stats__cline_scopy08s++;
5509 sv = zsm_sread08( src );
5510 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00005511}
5512
5513
sewardj23f12002009-07-24 08:45:08 +00005514/* Block-copy states (needed for implementing realloc()). Note this
5515 doesn't change the filtering arrangements. The caller of
5516 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00005517
sewardj23f12002009-07-24 08:45:08 +00005518static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005519{
5520 SizeT i;
5521 if (len == 0)
5522 return;
5523
5524 /* assert for non-overlappingness */
5525 tl_assert(src+len <= dst || dst+len <= src);
5526
5527 /* To be simple, just copy byte by byte. But so as not to wreck
5528 performance for later accesses to dst[0 .. len-1], normalise
5529 destination lines as we finish with them, and also normalise the
5530 line containing the first and last address. */
5531 for (i = 0; i < len; i++) {
5532 Bool normalise
5533 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5534 || i == 0 /* first in range */
5535 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00005536 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00005537 }
5538}
5539
5540
5541/* For setting address ranges to a given value. Has considerable
5542 sophistication so as to avoid generating large numbers of pointless
5543 cache loads/writebacks for large ranges. */
5544
5545/* Do small ranges in-cache, in the obvious way. */
5546static
sewardj23f12002009-07-24 08:45:08 +00005547void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005548{
5549 /* fast track a couple of common cases */
5550 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00005551 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005552 return;
5553 }
5554 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00005555 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005556 return;
5557 }
5558
5559 /* be completely general (but as efficient as possible) */
5560 if (len == 0) return;
5561
5562 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005563 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005564 a += 1;
5565 len -= 1;
5566 tl_assert(aligned16(a));
5567 }
5568 if (len == 0) return;
5569
5570 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005571 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005572 a += 2;
5573 len -= 2;
5574 tl_assert(aligned32(a));
5575 }
5576 if (len == 0) return;
5577
5578 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005579 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005580 a += 4;
5581 len -= 4;
5582 tl_assert(aligned64(a));
5583 }
5584 if (len == 0) return;
5585
5586 if (len >= 8) {
5587 tl_assert(aligned64(a));
5588 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00005589 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005590 a += 8;
5591 len -= 8;
5592 }
5593 tl_assert(aligned64(a));
5594 }
5595 if (len == 0) return;
5596
5597 if (len >= 4)
5598 tl_assert(aligned32(a));
5599 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005600 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005601 a += 4;
5602 len -= 4;
5603 }
5604 if (len == 0) return;
5605
5606 if (len >= 2)
5607 tl_assert(aligned16(a));
5608 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005609 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005610 a += 2;
5611 len -= 2;
5612 }
5613 if (len == 0) return;
5614
5615 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005616 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00005617 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00005618 len -= 1;
5619 }
5620 tl_assert(len == 0);
5621}
5622
5623
sewardj23f12002009-07-24 08:45:08 +00005624/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00005625 for larger ranges, try to operate directly on the out-of-cache
5626 representation, rather than dragging lines into the cache,
5627 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00005628 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00005629
sewardj23f12002009-07-24 08:45:08 +00005630 Note that this doesn't change the filtering arrangements. The
5631 caller of zsm_sset_range needs to attend to that. */
5632
5633static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005634{
5635 tl_assert(svNew != SVal_INVALID);
5636 stats__cache_make_New_arange += (ULong)len;
5637
5638 if (0 && len > 500)
5639 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
5640
5641 if (0) {
5642 static UWord n_New_in_cache = 0;
5643 static UWord n_New_not_in_cache = 0;
5644 /* tag is 'a' with the in-line offset masked out,
5645 eg a[31]..a[4] 0000 */
5646 Addr tag = a & ~(N_LINE_ARANGE - 1);
5647 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5648 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5649 n_New_in_cache++;
5650 } else {
5651 n_New_not_in_cache++;
5652 }
5653 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5654 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5655 n_New_in_cache, n_New_not_in_cache );
5656 }
5657
5658 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00005659 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005660 } else {
5661 Addr before_start = a;
5662 Addr aligned_start = cacheline_ROUNDUP(a);
5663 Addr after_start = cacheline_ROUNDDN(a + len);
5664 UWord before_len = aligned_start - before_start;
5665 UWord aligned_len = after_start - aligned_start;
5666 UWord after_len = a + len - after_start;
5667 tl_assert(before_start <= aligned_start);
5668 tl_assert(aligned_start <= after_start);
5669 tl_assert(before_len < N_LINE_ARANGE);
5670 tl_assert(after_len < N_LINE_ARANGE);
5671 tl_assert(get_cacheline_offset(aligned_start) == 0);
5672 if (get_cacheline_offset(a) == 0) {
5673 tl_assert(before_len == 0);
5674 tl_assert(a == aligned_start);
5675 }
5676 if (get_cacheline_offset(a+len) == 0) {
5677 tl_assert(after_len == 0);
5678 tl_assert(after_start == a+len);
5679 }
5680 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005681 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005682 }
5683 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005684 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005685 }
5686 stats__cache_make_New_inZrep += (ULong)aligned_len;
5687
5688 while (1) {
5689 Addr tag;
5690 UWord wix;
5691 if (aligned_start >= after_start)
5692 break;
5693 tl_assert(get_cacheline_offset(aligned_start) == 0);
5694 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5695 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5696 if (tag == cache_shmem.tags0[wix]) {
5697 UWord i;
5698 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005699 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005700 } else {
5701 UWord i;
5702 Word zix;
5703 SecMap* sm;
5704 LineZ* lineZ;
5705 /* This line is not in the cache. Do not force it in; instead
5706 modify it in-place. */
5707 /* find the Z line to write in and rcdec it or the
5708 associated F line. */
5709 find_Z_for_writing( &sm, &zix, tag );
5710 tl_assert(sm);
5711 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5712 lineZ = &sm->linesZ[zix];
5713 lineZ->dict[0] = svNew;
5714 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5715 for (i = 0; i < N_LINE_ARANGE/4; i++)
5716 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5717 rcinc_LineZ(lineZ);
5718 }
5719 aligned_start += N_LINE_ARANGE;
5720 aligned_len -= N_LINE_ARANGE;
5721 }
5722 tl_assert(aligned_start == after_start);
5723 tl_assert(aligned_len == 0);
5724 }
5725}
5726
5727
5728/////////////////////////////////////////////////////////
5729// //
sewardj23f12002009-07-24 08:45:08 +00005730// Front-filtering accesses //
5731// //
5732/////////////////////////////////////////////////////////
5733
5734static UWord stats__f_ac = 0;
5735static UWord stats__f_sk = 0;
5736
5737#if 0
5738# define STATS__F_SHOW \
5739 do { \
5740 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5741 VG_(printf)("filters: ac %lu sk %lu\n", \
5742 stats__f_ac, stats__f_sk); \
5743 } while (0)
5744#else
5745# define STATS__F_SHOW /* */
5746#endif
5747
5748void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5749 stats__f_ac++;
5750 STATS__F_SHOW;
5751 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5752 stats__f_sk++;
5753 return;
5754 }
5755 zsm_sapply08__msmcwrite(thr, a);
5756}
5757
5758void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5759 stats__f_ac++;
5760 STATS__F_SHOW;
5761 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5762 stats__f_sk++;
5763 return;
5764 }
5765 zsm_sapply16__msmcwrite(thr, a);
5766}
5767
5768void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5769 stats__f_ac++;
5770 STATS__F_SHOW;
5771 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5772 stats__f_sk++;
5773 return;
5774 }
5775 zsm_sapply32__msmcwrite(thr, a);
5776}
5777
5778void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5779 stats__f_ac++;
5780 STATS__F_SHOW;
5781 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5782 stats__f_sk++;
5783 return;
5784 }
5785 zsm_sapply64__msmcwrite(thr, a);
5786}
5787
5788void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5789{
5790 /* fast track a couple of common cases */
5791 if (len == 4 && aligned32(a)) {
5792 zsm_sapply32_f__msmcwrite( thr, a );
5793 return;
5794 }
5795 if (len == 8 && aligned64(a)) {
5796 zsm_sapply64_f__msmcwrite( thr, a );
5797 return;
5798 }
5799
5800 /* be completely general (but as efficient as possible) */
5801 if (len == 0) return;
5802
5803 if (!aligned16(a) && len >= 1) {
5804 zsm_sapply08_f__msmcwrite( thr, a );
5805 a += 1;
5806 len -= 1;
5807 tl_assert(aligned16(a));
5808 }
5809 if (len == 0) return;
5810
5811 if (!aligned32(a) && len >= 2) {
5812 zsm_sapply16_f__msmcwrite( thr, a );
5813 a += 2;
5814 len -= 2;
5815 tl_assert(aligned32(a));
5816 }
5817 if (len == 0) return;
5818
5819 if (!aligned64(a) && len >= 4) {
5820 zsm_sapply32_f__msmcwrite( thr, a );
5821 a += 4;
5822 len -= 4;
5823 tl_assert(aligned64(a));
5824 }
5825 if (len == 0) return;
5826
5827 if (len >= 8) {
5828 tl_assert(aligned64(a));
5829 while (len >= 8) {
5830 zsm_sapply64_f__msmcwrite( thr, a );
5831 a += 8;
5832 len -= 8;
5833 }
5834 tl_assert(aligned64(a));
5835 }
5836 if (len == 0) return;
5837
5838 if (len >= 4)
5839 tl_assert(aligned32(a));
5840 if (len >= 4) {
5841 zsm_sapply32_f__msmcwrite( thr, a );
5842 a += 4;
5843 len -= 4;
5844 }
5845 if (len == 0) return;
5846
5847 if (len >= 2)
5848 tl_assert(aligned16(a));
5849 if (len >= 2) {
5850 zsm_sapply16_f__msmcwrite( thr, a );
5851 a += 2;
5852 len -= 2;
5853 }
5854 if (len == 0) return;
5855
5856 if (len >= 1) {
5857 zsm_sapply08_f__msmcwrite( thr, a );
5858 //a += 1;
5859 len -= 1;
5860 }
5861 tl_assert(len == 0);
5862}
5863
5864void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5865 stats__f_ac++;
5866 STATS__F_SHOW;
5867 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5868 stats__f_sk++;
5869 return;
5870 }
5871 zsm_sapply08__msmcread(thr, a);
5872}
5873
5874void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5875 stats__f_ac++;
5876 STATS__F_SHOW;
5877 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5878 stats__f_sk++;
5879 return;
5880 }
5881 zsm_sapply16__msmcread(thr, a);
5882}
5883
5884void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5885 stats__f_ac++;
5886 STATS__F_SHOW;
5887 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5888 stats__f_sk++;
5889 return;
5890 }
5891 zsm_sapply32__msmcread(thr, a);
5892}
5893
5894void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5895 stats__f_ac++;
5896 STATS__F_SHOW;
5897 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5898 stats__f_sk++;
5899 return;
5900 }
5901 zsm_sapply64__msmcread(thr, a);
5902}
5903
5904void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5905{
5906 /* fast track a couple of common cases */
5907 if (len == 4 && aligned32(a)) {
5908 zsm_sapply32_f__msmcread( thr, a );
5909 return;
5910 }
5911 if (len == 8 && aligned64(a)) {
5912 zsm_sapply64_f__msmcread( thr, a );
5913 return;
5914 }
5915
5916 /* be completely general (but as efficient as possible) */
5917 if (len == 0) return;
5918
5919 if (!aligned16(a) && len >= 1) {
5920 zsm_sapply08_f__msmcread( thr, a );
5921 a += 1;
5922 len -= 1;
5923 tl_assert(aligned16(a));
5924 }
5925 if (len == 0) return;
5926
5927 if (!aligned32(a) && len >= 2) {
5928 zsm_sapply16_f__msmcread( thr, a );
5929 a += 2;
5930 len -= 2;
5931 tl_assert(aligned32(a));
5932 }
5933 if (len == 0) return;
5934
5935 if (!aligned64(a) && len >= 4) {
5936 zsm_sapply32_f__msmcread( thr, a );
5937 a += 4;
5938 len -= 4;
5939 tl_assert(aligned64(a));
5940 }
5941 if (len == 0) return;
5942
5943 if (len >= 8) {
5944 tl_assert(aligned64(a));
5945 while (len >= 8) {
5946 zsm_sapply64_f__msmcread( thr, a );
5947 a += 8;
5948 len -= 8;
5949 }
5950 tl_assert(aligned64(a));
5951 }
5952 if (len == 0) return;
5953
5954 if (len >= 4)
5955 tl_assert(aligned32(a));
5956 if (len >= 4) {
5957 zsm_sapply32_f__msmcread( thr, a );
5958 a += 4;
5959 len -= 4;
5960 }
5961 if (len == 0) return;
5962
5963 if (len >= 2)
5964 tl_assert(aligned16(a));
5965 if (len >= 2) {
5966 zsm_sapply16_f__msmcread( thr, a );
5967 a += 2;
5968 len -= 2;
5969 }
5970 if (len == 0) return;
5971
5972 if (len >= 1) {
5973 zsm_sapply08_f__msmcread( thr, a );
5974 //a += 1;
5975 len -= 1;
5976 }
5977 tl_assert(len == 0);
5978}
5979
5980void libhb_Thr_resumes ( Thr* thr )
5981{
5982 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005983 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00005984 tl_assert(!thr->llexit_done);
sewardj23f12002009-07-24 08:45:08 +00005985 Filter__clear(thr->filter, "libhb_Thr_resumes");
5986 /* A kludge, but .. if this thread doesn't have any marker stacks
5987 at all, get one right now. This is easier than figuring out
5988 exactly when at thread startup we can and can't take a stack
5989 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00005990 if (HG_(clo_history_level) == 1) {
5991 tl_assert(thr->local_Kws_n_stacks);
5992 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
5993 note_local_Kw_n_stack_for(thr);
5994 }
sewardj23f12002009-07-24 08:45:08 +00005995}
5996
5997
5998/////////////////////////////////////////////////////////
5999// //
sewardjf98e1c02008-10-25 16:22:41 +00006000// Synchronisation objects //
6001// //
6002/////////////////////////////////////////////////////////
6003
sewardjffce8152011-06-24 10:09:41 +00006004/* A double linked list of all the SO's. */
6005SO* admin_SO = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006006
sewardjffce8152011-06-24 10:09:41 +00006007static SO* SO__Alloc ( void )
6008{
sewardjf98e1c02008-10-25 16:22:41 +00006009 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6010 so->viR = VtsID_INVALID;
6011 so->viW = VtsID_INVALID;
6012 so->magic = SO_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00006013 /* Add to double linked list */
6014 if (admin_SO) {
6015 tl_assert(admin_SO->admin_prev == NULL);
6016 admin_SO->admin_prev = so;
6017 so->admin_next = admin_SO;
6018 } else {
6019 so->admin_next = NULL;
6020 }
6021 so->admin_prev = NULL;
6022 admin_SO = so;
6023 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006024 return so;
6025}
sewardjffce8152011-06-24 10:09:41 +00006026
6027static void SO__Dealloc ( SO* so )
6028{
sewardjf98e1c02008-10-25 16:22:41 +00006029 tl_assert(so);
6030 tl_assert(so->magic == SO_MAGIC);
6031 if (so->viR == VtsID_INVALID) {
6032 tl_assert(so->viW == VtsID_INVALID);
6033 } else {
6034 tl_assert(so->viW != VtsID_INVALID);
6035 VtsID__rcdec(so->viR);
6036 VtsID__rcdec(so->viW);
6037 }
6038 so->magic = 0;
sewardjffce8152011-06-24 10:09:41 +00006039 /* Del from double linked list */
6040 if (so->admin_prev)
6041 so->admin_prev->admin_next = so->admin_next;
6042 if (so->admin_next)
6043 so->admin_next->admin_prev = so->admin_prev;
6044 if (so == admin_SO)
6045 admin_SO = so->admin_next;
6046 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006047 HG_(free)( so );
6048}
6049
6050
6051/////////////////////////////////////////////////////////
6052// //
6053// Top Level API //
6054// //
6055/////////////////////////////////////////////////////////
6056
florian6bd9dc12012-11-23 16:17:43 +00006057static void show_thread_state ( const HChar* str, Thr* t )
sewardjf98e1c02008-10-25 16:22:41 +00006058{
6059 if (1) return;
6060 if (t->viR == t->viW) {
6061 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6062 VtsID__pp( t->viR );
6063 VG_(printf)("%s","\n");
6064 } else {
6065 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6066 VtsID__pp( t->viR );
6067 VG_(printf)(" viW %u==", t->viW);
6068 VtsID__pp( t->viW );
6069 VG_(printf)("%s","\n");
6070 }
6071}
6072
6073
6074Thr* libhb_init (
6075 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00006076 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00006077 )
6078{
6079 Thr* thr;
6080 VtsID vi;
sewardje4cce742011-02-24 15:25:24 +00006081
6082 // We will have to have to store a large number of these,
6083 // so make sure they're the size we expect them to be.
6084 tl_assert(sizeof(ScalarTS) == 8);
sewardjffce8152011-06-24 10:09:41 +00006085
6086 /* because first 1024 unusable */
6087 tl_assert(SCALARTS_N_THRBITS >= 11);
6088 /* so as to fit in a UInt w/ 3 bits to spare (see defn of
6089 Thr_n_RCEC). */
6090 tl_assert(SCALARTS_N_THRBITS <= 29);
6091
6092 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6093 (32-bit). It's not correctness-critical, but there are a lot of
6094 them, so it's important from a space viewpoint. Unfortunately
6095 we simply can't pack it into 2 words on a 32-bit target. */
6096 if (sizeof(UWord) == 8) {
6097 tl_assert(sizeof(Thr_n_RCEC) == 16);
6098 } else {
6099 tl_assert(sizeof(Thr_n_RCEC) == 12);
6100 }
6101
6102 /* Word sets really are 32 bits. Even on a 64 bit target. */
6103 tl_assert(sizeof(WordSetID) == 4);
6104 tl_assert(sizeof(WordSet) == sizeof(WordSetID));
sewardje4cce742011-02-24 15:25:24 +00006105
sewardjf98e1c02008-10-25 16:22:41 +00006106 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00006107 tl_assert(get_EC);
6108 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00006109 main_get_EC = get_EC;
6110
6111 // No need to initialise hg_wordfm.
6112 // No need to initialise hg_wordset.
6113
sewardj7aa38a92011-02-27 23:04:12 +00006114 /* Allocated once and never deallocated. Used as a temporary in
6115 VTS singleton, tick and join operations. */
6116 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6117 temp_max_sized_VTS->id = VtsID_INVALID;
philippec3508652015-03-28 12:01:58 +00006118 verydead_thread_tables_init();
sewardjf98e1c02008-10-25 16:22:41 +00006119 vts_set_init();
6120 vts_tab_init();
6121 event_map_init();
6122 VtsID__invalidate_caches();
6123
6124 // initialise shadow memory
philippe1475a7f2015-05-11 19:45:08 +00006125 zsm_init( );
sewardjf98e1c02008-10-25 16:22:41 +00006126
6127 thr = Thr__new();
6128 vi = VtsID__mk_Singleton( thr, 1 );
6129 thr->viR = vi;
6130 thr->viW = vi;
6131 VtsID__rcinc(thr->viR);
6132 VtsID__rcinc(thr->viW);
6133
6134 show_thread_state(" root", thr);
6135 return thr;
6136}
6137
sewardj23f12002009-07-24 08:45:08 +00006138
sewardjf98e1c02008-10-25 16:22:41 +00006139Thr* libhb_create ( Thr* parent )
6140{
6141 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6142 the child's index. Since the child's index is guaranteed
6143 unique, it has never been seen before, so the implicit value
6144 before the tick is zero and after that is one. */
6145 Thr* child = Thr__new();
6146
6147 child->viR = VtsID__tick( parent->viR, child );
6148 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00006149 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00006150 VtsID__rcinc(child->viR);
6151 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006152 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00006153 early for that - it may not have a valid TId yet. So, let
6154 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00006155
6156 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6157 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6158
6159 /* and the parent has to move along too */
6160 VtsID__rcdec(parent->viR);
6161 VtsID__rcdec(parent->viW);
6162 parent->viR = VtsID__tick( parent->viR, parent );
6163 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00006164 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00006165 VtsID__rcinc(parent->viR);
6166 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006167 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00006168
6169 show_thread_state(" child", child);
6170 show_thread_state("parent", parent);
6171
6172 return child;
6173}
6174
6175/* Shut down the library, and print stats (in fact that's _all_
6176 this is for. */
6177void libhb_shutdown ( Bool show_stats )
6178{
6179 if (show_stats) {
6180 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6181 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6182 stats__secmaps_allocd,
6183 stats__secmap_ga_space_covered);
6184 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6185 stats__secmap_linesZ_allocd,
6186 stats__secmap_linesZ_bytes);
6187 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
6188 stats__secmap_linesF_allocd,
6189 stats__secmap_linesF_bytes);
philippef54cb662015-05-10 22:19:31 +00006190 VG_(printf)(" secmaps: %'10lu in map (can be scanGCed %'5lu)"
6191 " #%lu scanGC \n",
6192 stats__secmaps_in_map_shmem,
6193 shmem__SecMap_do_GC(False /* really do GC */),
6194 stats__secmaps_scanGC);
6195 tl_assert (VG_(sizeFM) (map_shmem) == stats__secmaps_in_map_shmem);
6196 VG_(printf)(" secmaps: %'10lu in freelist,"
6197 " total (scanGCed %'lu, ssetGCed %'lu)\n",
6198 SecMap_freelist_length(),
6199 stats__secmaps_scanGCed,
6200 stats__secmaps_ssetGCed);
sewardjf98e1c02008-10-25 16:22:41 +00006201 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6202 stats__secmaps_search, stats__secmaps_search_slow);
6203
6204 VG_(printf)("%s","\n");
6205 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6206 stats__cache_totrefs, stats__cache_totmisses );
6207 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6208 stats__cache_Z_fetches, stats__cache_F_fetches );
6209 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6210 stats__cache_Z_wbacks, stats__cache_F_wbacks );
philippef54cb662015-05-10 22:19:31 +00006211 VG_(printf)(" cache: %'14lu flushes_invals\n",
6212 stats__cache_flushes_invals );
sewardjf98e1c02008-10-25 16:22:41 +00006213 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6214 stats__cache_make_New_arange,
6215 stats__cache_make_New_inZrep);
6216
6217 VG_(printf)("%s","\n");
6218 VG_(printf)(" cline: %'10lu normalises\n",
6219 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00006220 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6221 stats__cline_cread64s,
6222 stats__cline_cread32s,
6223 stats__cline_cread16s,
6224 stats__cline_cread08s );
6225 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6226 stats__cline_cwrite64s,
6227 stats__cline_cwrite32s,
6228 stats__cline_cwrite16s,
6229 stats__cline_cwrite08s );
6230 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6231 stats__cline_swrite64s,
6232 stats__cline_swrite32s,
6233 stats__cline_swrite16s,
6234 stats__cline_swrite08s );
6235 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6236 stats__cline_sread08s, stats__cline_scopy08s );
philippef54cb662015-05-10 22:19:31 +00006237 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu"
6238 " 2to1 %'12lu\n",
6239 stats__cline_64to32splits, stats__cline_32to16splits,
6240 stats__cline_16to8splits );
6241 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu"
6242 " 2to1 %'12lu\n",
6243 stats__cline_64to32pulldown, stats__cline_32to16pulldown,
6244 stats__cline_16to8pulldown );
sewardjf98e1c02008-10-25 16:22:41 +00006245 if (0)
philippef54cb662015-05-10 22:19:31 +00006246 VG_(printf)(" cline: sizeof(CacheLineZ) %ld,"
6247 " covers %ld bytes of arange\n",
6248 (Word)sizeof(LineZ),
6249 (Word)N_LINE_ARANGE);
sewardjf98e1c02008-10-25 16:22:41 +00006250
6251 VG_(printf)("%s","\n");
6252
sewardjc8028ad2010-05-05 09:34:42 +00006253 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006254 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00006255 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006256 stats__msmcwrite, stats__msmcwrite_change);
6257 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6258 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00006259 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6260 stats__join2_queries, stats__join2_misses);
6261
6262 VG_(printf)("%s","\n");
philippef54cb662015-05-10 22:19:31 +00006263 VG_(printf)(" libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6264 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6265 VG_(printf)(" libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6266 stats__vts__cmp_structural, stats__vts__cmp_structural_slow);
6267 VG_(printf)(" libhb: VTSset: find__or__clone_and_add %'lu"
6268 " (%'lu allocd)\n",
sewardj7aa38a92011-02-27 23:04:12 +00006269 stats__vts_set__focaa, stats__vts_set__focaa_a );
sewardjc8028ad2010-05-05 09:34:42 +00006270 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6271 stats__vts__indexat_slow );
6272
6273 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00006274 VG_(printf)(
6275 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6276 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6277 );
philippef54cb662015-05-10 22:19:31 +00006278 VG_(printf)(" libhb: #%lu vts_tab GC\n", stats__vts_tab_GC);
sewardjf98e1c02008-10-25 16:22:41 +00006279 VG_(printf)( " libhb: %lu entries in vts_set\n",
6280 VG_(sizeFM)( vts_set ) );
6281
6282 VG_(printf)("%s","\n");
philippe900c5352015-03-24 14:02:44 +00006283 {
6284 UInt live = 0;
6285 UInt llexit_done = 0;
6286 UInt joinedwith_done = 0;
6287 UInt llexit_and_joinedwith_done = 0;
6288
6289 Thread* hgthread = get_admin_threads();
6290 tl_assert(hgthread);
6291 while (hgthread) {
6292 Thr* hbthr = hgthread->hbthr;
6293 tl_assert(hbthr);
6294 if (hbthr->llexit_done && hbthr->joinedwith_done)
6295 llexit_and_joinedwith_done++;
6296 else if (hbthr->llexit_done)
6297 llexit_done++;
6298 else if (hbthr->joinedwith_done)
6299 joinedwith_done++;
6300 else
6301 live++;
6302 hgthread = hgthread->admin;
6303 }
6304 VG_(printf)(" libhb: threads live: %d exit_and_joinedwith %d"
6305 " exit %d joinedwith %d\n",
6306 live, llexit_and_joinedwith_done,
6307 llexit_done, joinedwith_done);
philippec3508652015-03-28 12:01:58 +00006308 VG_(printf)(" libhb: %d verydead_threads, "
6309 "%d verydead_threads_not_pruned\n",
6310 (int) VG_(sizeXA)( verydead_thread_table),
6311 (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6312 tl_assert (VG_(sizeXA)( verydead_thread_table)
6313 + VG_(sizeXA)( verydead_thread_table_not_pruned)
6314 == llexit_and_joinedwith_done);
philippe900c5352015-03-24 14:02:44 +00006315 }
6316
6317 VG_(printf)("%s","\n");
philipped005b2c2015-04-21 21:58:14 +00006318 {
6319 UWord OldRef_accs_n[N_OLDREF_ACCS+1];
6320 UInt accs_n;
6321 UWord OldRef_n;
6322 UInt i;
6323
6324 OldRef_n = 0;
6325 for (i = 0; i <= N_OLDREF_ACCS; i++)
6326 OldRef_accs_n[i] = 0;
6327
6328 for (OldRef* o = mru.prev; o != &lru; o = o->prev) {
6329 OldRef_n++;
6330 accs_n = 0;
6331 for (i = 0; i < N_OLDREF_ACCS; i++) {
6332 if (o->accs[i].thrid != 0)
6333 accs_n++;
6334 }
6335 OldRef_accs_n[accs_n]++;
6336 }
6337
6338 tl_assert(OldRef_n == oldrefTreeN);
6339 VG_(printf)( " libhb: oldrefTreeN %lu ", oldrefTreeN);
6340 VG_(printf)( "( ");
6341 for (i = 0; i <= N_OLDREF_ACCS; i++)
6342 VG_(printf)( "accs[%d]=%lu ", i, OldRef_accs_n[i]);
6343 VG_(printf)( ")\n");
6344 }
sewardjf98e1c02008-10-25 16:22:41 +00006345 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
6346 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
6347 stats__ctxt_rcdec2,
6348 stats__ctxt_rcdec3 );
6349 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
6350 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
philippecabdbb52015-04-20 21:33:16 +00006351 VG_(printf)( " libhb: contextTab: %lu slots,"
6352 " %lu cur ents(ref'd %lu),"
philippe06bc23a2015-04-17 21:19:43 +00006353 " %lu max ents\n",
sewardjf98e1c02008-10-25 16:22:41 +00006354 (UWord)N_RCEC_TAB,
philippecabdbb52015-04-20 21:33:16 +00006355 stats__ctxt_tab_curr, RCEC_referenced,
6356 stats__ctxt_tab_max );
philippe47124e92015-04-25 14:00:24 +00006357 {
6358# define MAXCHAIN 10
6359 UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
6360 UInt non0chain = 0;
6361 UInt n;
6362 UInt i;
6363 RCEC *p;
6364
6365 for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
6366 for (i = 0; i < N_RCEC_TAB; i++) {
6367 n = 0;
6368 for (p = contextTab[i]; p; p = p->next)
6369 n++;
6370 if (n < MAXCHAIN)
6371 chains[n]++;
6372 else
6373 chains[MAXCHAIN]++;
6374 if (n > 0)
6375 non0chain++;
6376 }
6377 VG_(printf)( " libhb: contextTab chain of [length]=nchain."
6378 " Avg chain len %3.1f\n"
6379 " ",
6380 (Double)stats__ctxt_tab_curr
6381 / (Double)(non0chain ? non0chain : 1));
6382 for (i = 0; i <= MAXCHAIN; i++) {
6383 if (chains[i] != 0)
6384 VG_(printf)( "[%d%s]=%d ",
6385 i, i == MAXCHAIN ? "+" : "",
6386 chains[i]);
6387 }
6388 VG_(printf)( "\n");
6389# undef MAXCHAIN
6390 }
sewardjf98e1c02008-10-25 16:22:41 +00006391 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6392 stats__ctxt_tab_qs,
6393 stats__ctxt_tab_cmps );
6394#if 0
6395 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6396 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6397 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6398 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6399 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6400 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6401 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6402 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6403 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6404 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6405 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6406 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6407 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6408 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6409
6410 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6411 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6412 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6413 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6414#endif
6415
6416 VG_(printf)("%s","<<< END libhb stats >>>\n");
6417 VG_(printf)("%s","\n");
6418
6419 }
6420}
6421
sewardjffce8152011-06-24 10:09:41 +00006422/* Receive notification that a thread has low level exited. The
6423 significance here is that we do not expect to see any more memory
6424 references from it. */
sewardjf98e1c02008-10-25 16:22:41 +00006425void libhb_async_exit ( Thr* thr )
6426{
sewardj23f12002009-07-24 08:45:08 +00006427 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006428 tl_assert(!thr->llexit_done);
6429 thr->llexit_done = True;
sewardj2d2ea2f2009-08-02 10:15:07 +00006430
6431 /* free up Filter and local_Kws_n_stacks (well, actually not the
6432 latter ..) */
6433 tl_assert(thr->filter);
6434 HG_(free)(thr->filter);
6435 thr->filter = NULL;
6436
sewardjffce8152011-06-24 10:09:41 +00006437 /* Tell the VTS mechanism this thread has exited, so it can
6438 participate in VTS pruning. Note this can only happen if the
6439 thread has both ll_exited and has been joined with. */
6440 if (thr->joinedwith_done)
6441 VTS__declare_thread_very_dead(thr);
6442
sewardj2d2ea2f2009-08-02 10:15:07 +00006443 /* Another space-accuracy tradeoff. Do we want to be able to show
6444 H1 history for conflicts in threads which have since exited? If
6445 yes, then we better not free up thr->local_Kws_n_stacks. The
6446 downside is a potential per-thread leak of up to
6447 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6448 XArray average overcommit factor is (1.5 I'd guess). */
6449 // hence:
6450 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6451 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006452}
6453
sewardjffce8152011-06-24 10:09:41 +00006454/* Receive notification that a thread has been joined with. The
6455 significance here is that we do not expect to see any further
6456 references to its vector clocks (Thr::viR and Thr::viW). */
6457void libhb_joinedwith_done ( Thr* thr )
6458{
6459 tl_assert(thr);
6460 /* Caller must ensure that this is only ever called once per Thr. */
6461 tl_assert(!thr->joinedwith_done);
6462 thr->joinedwith_done = True;
6463 if (thr->llexit_done)
6464 VTS__declare_thread_very_dead(thr);
6465}
6466
6467
sewardjf98e1c02008-10-25 16:22:41 +00006468/* Both Segs and SOs point to VTSs. However, there is no sharing, so
6469 a Seg that points at a VTS is its one-and-only owner, and ditto for
6470 a SO that points at a VTS. */
6471
6472SO* libhb_so_alloc ( void )
6473{
6474 return SO__Alloc();
6475}
6476
6477void libhb_so_dealloc ( SO* so )
6478{
6479 tl_assert(so);
6480 tl_assert(so->magic == SO_MAGIC);
6481 SO__Dealloc(so);
6482}
6483
6484/* See comments in libhb.h for details on the meaning of
6485 strong vs weak sends and strong vs weak receives. */
6486void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6487{
6488 /* Copy the VTSs from 'thr' into the sync object, and then move
6489 the thread along one step. */
6490
6491 tl_assert(so);
6492 tl_assert(so->magic == SO_MAGIC);
6493
6494 /* stay sane .. a thread's read-clock must always lead or be the
6495 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00006496 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6497 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00006498 }
6499
6500 /* since we're overwriting the VtsIDs in the SO, we need to drop
6501 any references made by the previous contents thereof */
6502 if (so->viR == VtsID_INVALID) {
6503 tl_assert(so->viW == VtsID_INVALID);
6504 so->viR = thr->viR;
6505 so->viW = thr->viW;
6506 VtsID__rcinc(so->viR);
6507 VtsID__rcinc(so->viW);
6508 } else {
6509 /* In a strong send, we dump any previous VC in the SO and
6510 install the sending thread's VC instead. For a weak send we
6511 must join2 with what's already there. */
6512 tl_assert(so->viW != VtsID_INVALID);
6513 VtsID__rcdec(so->viR);
6514 VtsID__rcdec(so->viW);
6515 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6516 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6517 VtsID__rcinc(so->viR);
6518 VtsID__rcinc(so->viW);
6519 }
6520
6521 /* move both parent clocks along */
6522 VtsID__rcdec(thr->viR);
6523 VtsID__rcdec(thr->viW);
6524 thr->viR = VtsID__tick( thr->viR, thr );
6525 thr->viW = VtsID__tick( thr->viW, thr );
sewardjffce8152011-06-24 10:09:41 +00006526 if (!thr->llexit_done) {
sewardj2d2ea2f2009-08-02 10:15:07 +00006527 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00006528 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006529 }
sewardjf98e1c02008-10-25 16:22:41 +00006530 VtsID__rcinc(thr->viR);
6531 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006532
sewardjf98e1c02008-10-25 16:22:41 +00006533 if (strong_send)
6534 show_thread_state("s-send", thr);
6535 else
6536 show_thread_state("w-send", thr);
6537}
6538
6539void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6540{
6541 tl_assert(so);
6542 tl_assert(so->magic == SO_MAGIC);
6543
6544 if (so->viR != VtsID_INVALID) {
6545 tl_assert(so->viW != VtsID_INVALID);
6546
6547 /* Weak receive (basically, an R-acquisition of a R-W lock).
6548 This advances the read-clock of the receiver, but not the
6549 write-clock. */
6550 VtsID__rcdec(thr->viR);
6551 thr->viR = VtsID__join2( thr->viR, so->viR );
6552 VtsID__rcinc(thr->viR);
6553
sewardj90eb22e2009-07-28 20:22:18 +00006554 /* At one point (r10589) it seemed safest to tick the clocks for
6555 the receiving thread after the join. But on reflection, I
6556 wonder if that might cause it to 'overtake' constraints,
6557 which could lead to missing races. So, back out that part of
6558 r10589. */
6559 //VtsID__rcdec(thr->viR);
6560 //thr->viR = VtsID__tick( thr->viR, thr );
6561 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00006562
sewardjf98e1c02008-10-25 16:22:41 +00006563 /* For a strong receive, we also advance the receiver's write
6564 clock, which means the receive as a whole is essentially
6565 equivalent to a W-acquisition of a R-W lock. */
6566 if (strong_recv) {
6567 VtsID__rcdec(thr->viW);
6568 thr->viW = VtsID__join2( thr->viW, so->viW );
6569 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006570
sewardj90eb22e2009-07-28 20:22:18 +00006571 /* See comment just above, re r10589. */
6572 //VtsID__rcdec(thr->viW);
6573 //thr->viW = VtsID__tick( thr->viW, thr );
6574 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00006575 }
6576
sewardjf4845dc2010-05-28 20:09:59 +00006577 if (thr->filter)
6578 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00006579 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00006580
sewardjf98e1c02008-10-25 16:22:41 +00006581 if (strong_recv)
6582 show_thread_state("s-recv", thr);
6583 else
6584 show_thread_state("w-recv", thr);
6585
6586 } else {
6587 tl_assert(so->viW == VtsID_INVALID);
6588 /* Deal with degenerate case: 'so' has no vts, so there has been
6589 no message posted to it. Just ignore this case. */
6590 show_thread_state("d-recv", thr);
6591 }
6592}
6593
6594Bool libhb_so_everSent ( SO* so )
6595{
6596 if (so->viR == VtsID_INVALID) {
6597 tl_assert(so->viW == VtsID_INVALID);
6598 return False;
6599 } else {
6600 tl_assert(so->viW != VtsID_INVALID);
6601 return True;
6602 }
6603}
6604
6605#define XXX1 0 // 0x67a106c
6606#define XXX2 0
6607
sewardj23f12002009-07-24 08:45:08 +00006608static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00006609 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6610 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6611 return False;
6612}
florian0c8a47c2013-10-01 20:10:21 +00006613static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
florian6bf37262012-10-21 03:23:36 +00006614{
sewardj23f12002009-07-24 08:45:08 +00006615 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00006616 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6617 show_thread_state("", thr);
6618 VG_(printf)("%s","\n");
6619}
6620
sewardj23f12002009-07-24 08:45:08 +00006621void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006622{
6623 SVal sv = SVal__mkC(thr->viW, thr->viW);
6624 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00006625 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6626 zsm_sset_range( a, szB, sv );
6627 Filter__clear_range( thr->filter, a, szB );
6628 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00006629}
6630
sewardjfd35d492011-03-17 19:39:55 +00006631void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006632{
sewardj23f12002009-07-24 08:45:08 +00006633 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00006634}
6635
philippef54cb662015-05-10 22:19:31 +00006636
6637/* Set the lines zix_start till zix_end to NOACCESS. */
6638static void zsm_secmap_line_range_noaccess (SecMap *sm,
6639 UInt zix_start, UInt zix_end)
6640{
6641 for (UInt lz = zix_start; lz <= zix_end; lz++) {
6642 LineZ* lineZ;
6643 LineF* lineF;
6644 lineZ = &sm->linesZ[lz];
6645 if (lineZ->dict[0] != SVal_INVALID) {
6646 rcdec_LineZ(lineZ);
6647 } else {
6648 UInt fix = (UInt)lineZ->dict[1];
6649 tl_assert(sm->linesF);
6650 tl_assert(sm->linesF_size > 0);
6651 tl_assert(fix >= 0 && fix < sm->linesF_size);
6652 lineF = &sm->linesF[fix];
6653 rcdec_LineF(lineF);
6654 lineF->inUse = False;
6655 }
6656 lineZ->dict[0] = SVal_NOACCESS;
6657 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
6658 for (UInt i = 0; i < N_LINE_ARANGE/4; i++)
6659 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
6660 }
6661}
6662
6663/* Set the given range to SVal_NOACCESS in-place in the secmap.
6664 a must be cacheline aligned. len must be a multiple of a cacheline
6665 and must be < N_SECMAP_ARANGE. */
6666static void zsm_sset_range_noaccess_in_secmap(Addr a, SizeT len)
6667{
6668 tl_assert (is_valid_scache_tag (a));
6669 tl_assert (0 == (len & (N_LINE_ARANGE - 1)));
6670 tl_assert (len < N_SECMAP_ARANGE);
6671
6672 SecMap *sm1 = shmem__find_SecMap (a);
6673 SecMap *sm2 = shmem__find_SecMap (a + len - 1);
6674 UWord zix_start = shmem__get_SecMap_offset(a ) >> N_LINE_BITS;
6675 UWord zix_end = shmem__get_SecMap_offset(a + len - 1) >> N_LINE_BITS;
6676
6677 if (sm1) {
6678 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm1));
6679 zsm_secmap_line_range_noaccess (sm1, zix_start,
6680 sm1 == sm2 ? zix_end : N_SECMAP_ZLINES-1);
6681 }
6682 if (sm2 && sm1 != sm2) {
6683 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm2));
6684 zsm_secmap_line_range_noaccess (sm2, 0, zix_end);
6685 }
6686}
6687
6688/* Set the given address range to SVal_NOACCESS.
6689 The SecMaps fully set to SVal_NOACCESS will be pushed in SecMap_freelist. */
6690static void zsm_sset_range_noaccess (Addr addr, SizeT len)
6691{
6692 /*
6693 BPC = Before, Partial Cacheline, = addr
6694 (i.e. starting inside a cacheline/inside a SecMap)
6695 BFC = Before, Full Cacheline(s), but not full SecMap
6696 (i.e. starting inside a SecMap)
6697 FSM = Full SecMap(s)
6698 (i.e. starting a SecMap)
6699 AFC = After, Full Cacheline(s), but not full SecMap
6700 (i.e. first address after the full SecMap(s))
6701 APC = After, Partial Cacheline, i.e. first address after the
6702 full CacheLines).
6703 ARE = After Range End = addr+len = first address not part of the range.
6704
6705 If addr starts a Cacheline, then BPC == BFC.
6706 If addr starts a SecMap, then BPC == BFC == FSM.
6707 If addr+len starts a SecMap, then APC == ARE == AFC
6708 If addr+len starts a Cacheline, then APC == ARE
6709 */
6710 Addr ARE = addr + len;
6711 Addr BPC = addr;
6712 Addr BFC = ROUNDUP(BPC, N_LINE_ARANGE);
6713 Addr FSM = ROUNDUP(BPC, N_SECMAP_ARANGE);
6714 Addr AFC = ROUNDDN(ARE, N_SECMAP_ARANGE);
6715 Addr APC = ROUNDDN(ARE, N_LINE_ARANGE);
6716 SizeT Plen = len; // Plen will be split between the following:
6717 SizeT BPClen;
6718 SizeT BFClen;
6719 SizeT FSMlen;
6720 SizeT AFClen;
6721 SizeT APClen;
6722
6723 /* Consumes from Plen the nr of bytes between from and to.
6724 from and to must be aligned on a multiple of round.
6725 The length consumed will be a multiple of round, with
6726 a maximum of Plen. */
6727# define PlenCONSUME(from, to, round, consumed) \
6728 do { \
6729 if (from < to) { \
6730 if (to - from < Plen) \
6731 consumed = to - from; \
6732 else \
6733 consumed = ROUNDDN(Plen, round); \
6734 } else { \
6735 consumed = 0; \
6736 } \
6737 Plen -= consumed; } while (0)
6738
6739 PlenCONSUME(BPC, BFC, 1, BPClen);
6740 PlenCONSUME(BFC, FSM, N_LINE_ARANGE, BFClen);
6741 PlenCONSUME(FSM, AFC, N_SECMAP_ARANGE, FSMlen);
6742 PlenCONSUME(AFC, APC, N_LINE_ARANGE, AFClen);
6743 PlenCONSUME(APC, ARE, 1, APClen);
6744
6745 if (0)
6746 VG_(printf) ("addr %p[%ld] ARE %p"
6747 " BPC %p[%ld] BFC %p[%ld] FSM %p[%ld]"
6748 " AFC %p[%ld] APC %p[%ld]\n",
6749 (void*)addr, len, (void*)ARE,
6750 (void*)BPC, BPClen, (void*)BFC, BFClen, (void*)FSM, FSMlen,
6751 (void*)AFC, AFClen, (void*)APC, APClen);
6752
6753 tl_assert (Plen == 0);
6754
6755 /* Set to NOACCESS pieces before and after not covered by entire SecMaps. */
6756
6757 /* First we set the partial cachelines. This is done through the cache. */
6758 if (BPClen > 0)
6759 zsm_sset_range_SMALL (BPC, BPClen, SVal_NOACCESS);
6760 if (APClen > 0)
6761 zsm_sset_range_SMALL (APC, APClen, SVal_NOACCESS);
6762
6763 /* After this, we will not use the cache anymore. We will directly work
6764 in-place on the z shadow memory in SecMap(s).
6765 So, we invalidate the cachelines for the whole range we are setting
6766 to NOACCESS below. */
6767 shmem__invalidate_scache_range (BFC, APC - BFC);
6768
6769 if (BFClen > 0)
6770 zsm_sset_range_noaccess_in_secmap (BFC, BFClen);
6771 if (AFClen > 0)
6772 zsm_sset_range_noaccess_in_secmap (AFC, AFClen);
6773
6774 if (FSMlen > 0) {
6775 /* Set to NOACCESS all the SecMaps, pushing the SecMaps to the
6776 free list. */
6777 Addr sm_start = FSM;
6778 while (sm_start < AFC) {
6779 SecMap *sm = shmem__find_SecMap (sm_start);
6780 if (sm) {
6781 Addr gaKey;
6782 SecMap *fm_sm;
6783
6784 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
6785 for (UInt lz = 0; lz < N_SECMAP_ZLINES; lz++) {
6786 if (sm->linesZ[lz].dict[0] != SVal_INVALID)
6787 rcdec_LineZ(&sm->linesZ[lz]);
6788 }
6789 for (UInt lf = 0; lf < sm->linesF_size; lf++) {
6790 if (sm->linesF[lf].inUse)
6791 rcdec_LineF (&sm->linesF[lf]);
6792 }
6793 if (sm->linesF_size > 0) {
6794 HG_(free)(sm->linesF);
6795 stats__secmap_linesF_allocd -= sm->linesF_size;
6796 stats__secmap_linesF_bytes -= sm->linesF_size * sizeof(LineF);
6797 }
6798 if (!VG_(delFromFM)(map_shmem, &gaKey, (UWord*)&fm_sm, sm_start))
6799 tl_assert (0);
6800 stats__secmaps_in_map_shmem--;
6801 tl_assert (gaKey == sm_start);
6802 tl_assert (sm == fm_sm);
6803 stats__secmaps_ssetGCed++;
6804 push_SecMap_on_freelist (sm);
6805 }
6806 sm_start += N_SECMAP_ARANGE;
6807 }
6808 tl_assert (sm_start == AFC);
6809
6810 /* The above loop might have kept copies of freed SecMap in the smCache.
6811 => clear them. */
6812 if (address_in_range(smCache[0].gaKey, FSM, FSMlen)) {
6813 smCache[0].gaKey = 1;
6814 smCache[0].sm = NULL;
6815 }
6816 if (address_in_range(smCache[1].gaKey, FSM, FSMlen)) {
6817 smCache[1].gaKey = 1;
6818 smCache[1].sm = NULL;
6819 }
6820 if (address_in_range(smCache[2].gaKey, FSM, FSMlen)) {
6821 smCache[2].gaKey = 1;
6822 smCache[2].sm = NULL;
6823 }
6824 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(SMCacheEnt));
6825 }
6826}
6827
sewardjfd35d492011-03-17 19:39:55 +00006828void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6829{
6830 /* This really does put the requested range in NoAccess. It's
6831 expensive though. */
6832 SVal sv = SVal_NOACCESS;
6833 tl_assert(is_sane_SVal_C(sv));
philippef54cb662015-05-10 22:19:31 +00006834 if (LIKELY(szB < 2 * N_LINE_ARANGE))
6835 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
6836 else
6837 zsm_sset_range_noaccess (a, szB);
sewardjfd35d492011-03-17 19:39:55 +00006838 Filter__clear_range( thr->filter, a, szB );
6839}
6840
philippef54cb662015-05-10 22:19:31 +00006841/* Works byte at a time. Can be optimised if needed. */
6842UWord libhb_srange_get_abits (Addr a, UChar *abits, SizeT len)
6843{
6844 UWord anr = 0; // nr of bytes addressable.
6845
6846 /* Get the accessibility of each byte. Pay attention to not
6847 create SecMap or LineZ when checking if a byte is addressable.
6848
6849 Note: this is used for client request. Performance deemed not critical.
6850 So for simplicity, we work byte per byte.
6851 Performance could be improved by working with full cachelines
6852 or with full SecMap, when reaching a cacheline or secmap boundary. */
6853 for (SizeT i = 0; i < len; i++) {
6854 SVal sv = SVal_INVALID;
6855 Addr b = a + i;
6856 Addr tag = b & ~(N_LINE_ARANGE - 1);
6857 UWord wix = (b >> N_LINE_BITS) & (N_WAY_NENT - 1);
6858 UWord cloff = get_cacheline_offset(b);
6859
6860 /* Note: we do not use get_cacheline(b) to avoid creating cachelines
6861 and/or SecMap for non addressable bytes. */
6862 if (tag == cache_shmem.tags0[wix]) {
6863 CacheLine copy = cache_shmem.lyns0[wix];
6864 /* We work on a copy of the cacheline, as we do not want to
6865 record the client request as a real read.
6866 The below is somewhat similar to zsm_sapply08__msmcread but
6867 avoids side effects on the cache. */
6868 UWord toff = get_tree_offset(b); /* == 0 .. 7 */
6869 UWord tno = get_treeno(b);
6870 UShort descr = copy.descrs[tno];
6871 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
6872 SVal* tree = &copy.svals[tno << 3];
6873 copy.descrs[tno] = pulldown_to_8(tree, toff, descr);
6874 }
6875 sv = copy.svals[cloff];
6876 } else {
6877 /* Byte not found in the cacheline. Search for a SecMap. */
6878 SecMap *sm = shmem__find_SecMap(b);
6879 LineZ *lineZ;
6880 if (sm == NULL)
6881 sv = SVal_NOACCESS;
6882 else {
6883 UWord zix = shmem__get_SecMap_offset(b) >> N_LINE_BITS;
6884 lineZ = &sm->linesZ[zix];
6885 if (lineZ->dict[0] == SVal_INVALID) {
6886 UInt fix = (UInt)lineZ->dict[1];
6887 sv = sm->linesF[fix].w64s[cloff];
6888 } else {
6889 UWord ix = read_twobit_array( lineZ->ix2s, cloff );
6890 sv = lineZ->dict[ix];
6891 }
6892 }
6893 }
6894
6895 tl_assert (sv != SVal_INVALID);
6896 if (sv == SVal_NOACCESS) {
6897 if (abits)
6898 abits[i] = 0x00;
6899 } else {
6900 if (abits)
6901 abits[i] = 0xff;
6902 anr++;
6903 }
6904 }
6905
6906 return anr;
6907}
6908
6909
sewardj406bac82010-03-03 23:03:40 +00006910void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
6911{
6912 SVal sv = SVal_NOACCESS;
6913 tl_assert(is_sane_SVal_C(sv));
6914 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
philippef54cb662015-05-10 22:19:31 +00006915 if (LIKELY(szB < 2 * N_LINE_ARANGE))
6916 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
6917 else
6918 zsm_sset_range_noaccess (a, szB);
sewardj406bac82010-03-03 23:03:40 +00006919 Filter__clear_range( thr->filter, a, szB );
6920 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
6921}
6922
sewardj0b20a152011-03-10 21:34:21 +00006923Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
sewardjf98e1c02008-10-25 16:22:41 +00006924 tl_assert(thr);
sewardj60626642011-03-10 15:14:37 +00006925 return thr->hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006926}
6927
sewardj0b20a152011-03-10 21:34:21 +00006928void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
sewardjf98e1c02008-10-25 16:22:41 +00006929 tl_assert(thr);
sewardj0b20a152011-03-10 21:34:21 +00006930 thr->hgthread = hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006931}
6932
sewardj23f12002009-07-24 08:45:08 +00006933void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00006934{
sewardj23f12002009-07-24 08:45:08 +00006935 zsm_scopy_range(src, dst, len);
6936 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00006937}
6938
6939void libhb_maybe_GC ( void )
6940{
philippecabdbb52015-04-20 21:33:16 +00006941 /* GC the unreferenced (zero rc) RCECs when
philippee0829e02015-04-21 20:55:40 +00006942 (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
6943 with mostly NULL ptr)
6944 and (2) approaching the max nr of RCEC (as we have in any case
6945 at least that amount of RCEC in the pool allocator)
6946 Note: the margin allows to avoid a small but constant increase
6947 of the max nr of RCEC due to the fact that libhb_maybe_GC is
6948 not called when the current nr of RCEC exactly reaches the max.
6949 and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
6950 Avoid growing too much the nr of RCEC keeps the memory use low,
6951 and avoids to have too many elements in the (fixed) contextTab hashtable.
6952 */
philippecabdbb52015-04-20 21:33:16 +00006953 if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
philippee0829e02015-04-21 20:55:40 +00006954 && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
philippef54cb662015-05-10 22:19:31 +00006955 && (stats__ctxt_tab_curr * 3)/4 > RCEC_referenced))
philippecabdbb52015-04-20 21:33:16 +00006956 do_RCEC_GC();
philippe158404e2015-04-10 19:34:14 +00006957
philippef54cb662015-05-10 22:19:31 +00006958 /* If there are still no entries available (all the table entries are full),
6959 and we hit the threshhold point, then do a GC */
6960 Bool vts_tab_GC = vts_tab_freelist == VtsID_INVALID
6961 && VG_(sizeXA)( vts_tab ) >= vts_next_GC_at;
6962 if (UNLIKELY (vts_tab_GC))
6963 vts_tab__do_GC( False/*don't show stats*/ );
6964
6965 /* scan GC the SecMaps when
6966 (1) no SecMap in the freelist
6967 and (2) the current nr of live secmaps exceeds the threshold. */
6968 if (UNLIKELY(SecMap_freelist == NULL
6969 && stats__secmaps_in_map_shmem >= next_SecMap_GC_at)) {
6970 // If we did a vts tab GC, then no need to flush the cache again.
6971 if (!vts_tab_GC)
6972 zsm_flush_cache();
6973 shmem__SecMap_do_GC(True);
6974 }
philippecabdbb52015-04-20 21:33:16 +00006975
6976 /* Check the reference counts (expensive) */
6977 if (CHECK_CEM)
6978 event_map__check_reference_counts();
sewardjf98e1c02008-10-25 16:22:41 +00006979}
6980
6981
6982/////////////////////////////////////////////////////////////////
6983/////////////////////////////////////////////////////////////////
6984// //
6985// SECTION END main library //
6986// //
6987/////////////////////////////////////////////////////////////////
6988/////////////////////////////////////////////////////////////////
6989
6990/*--------------------------------------------------------------------*/
6991/*--- end libhb_main.c ---*/
6992/*--------------------------------------------------------------------*/