blob: d507394502ab54b3890eea65044bf80dbe1502b8 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
sewardj0f157dd2013-10-18 14:27:36 +000012 Copyright (C) 2008-2013 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000034#include "pub_tool_poolalloc.h"
sewardjf98e1c02008-10-25 16:22:41 +000035#include "pub_tool_libcassert.h"
36#include "pub_tool_libcbase.h"
37#include "pub_tool_libcprint.h"
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000040#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000041#include "pub_tool_xarray.h"
42#include "pub_tool_oset.h"
43#include "pub_tool_threadstate.h"
44#include "pub_tool_aspacemgr.h"
45#include "pub_tool_execontext.h"
46#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000047#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000048#include "hg_basics.h"
49#include "hg_wordset.h"
50#include "hg_lock_n_thread.h"
51#include "hg_errors.h"
52
53#include "libhb.h"
54
55
sewardj8f5374e2008-12-07 11:40:17 +000056/////////////////////////////////////////////////////////////////
57/////////////////////////////////////////////////////////////////
58// //
59// Debugging #defines //
60// //
61/////////////////////////////////////////////////////////////////
62/////////////////////////////////////////////////////////////////
63
64/* Check the sanity of shadow values in the core memory state
65 machine. Change #if 0 to #if 1 to enable this. */
66#if 0
67# define CHECK_MSM 1
68#else
69# define CHECK_MSM 0
70#endif
71
72
73/* Check sanity (reference counts, etc) in the conflicting access
74 machinery. Change #if 0 to #if 1 to enable this. */
75#if 0
76# define CHECK_CEM 1
77#else
78# define CHECK_CEM 0
79#endif
80
81
82/* Check sanity in the compressed shadow memory machinery,
83 particularly in its caching innards. Unfortunately there's no
84 almost-zero-cost way to make them selectable at run time. Hence
85 set the #if 0 to #if 1 and rebuild if you want them. */
86#if 0
87# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
88# define inline __attribute__((noinline))
89 /* probably want to ditch -fomit-frame-pointer too */
90#else
91# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
92#endif
93
94
95/////////////////////////////////////////////////////////////////
96/////////////////////////////////////////////////////////////////
97// //
sewardjffce8152011-06-24 10:09:41 +000098// data decls: VtsID //
99// //
100/////////////////////////////////////////////////////////////////
101/////////////////////////////////////////////////////////////////
102
103/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
104 bits, since they have to be packed into the lowest 30 bits of an
105 SVal. */
106typedef UInt VtsID;
107#define VtsID_INVALID 0xFFFFFFFF
108
109
110
111/////////////////////////////////////////////////////////////////
112/////////////////////////////////////////////////////////////////
113// //
114// data decls: SVal //
115// //
116/////////////////////////////////////////////////////////////////
117/////////////////////////////////////////////////////////////////
118
119typedef ULong SVal;
120
121/* This value has special significance to the implementation, and callers
122 may not store it in the shadow memory. */
123#define SVal_INVALID (3ULL << 62)
124
125/* This is the default value for shadow memory. Initially the shadow
126 memory contains no accessible areas and so all reads produce this
127 value. TODO: make this caller-defineable. */
128#define SVal_NOACCESS (2ULL << 62)
129
130
131
132/////////////////////////////////////////////////////////////////
133/////////////////////////////////////////////////////////////////
134// //
135// data decls: ScalarTS //
136// //
137/////////////////////////////////////////////////////////////////
138/////////////////////////////////////////////////////////////////
139
140/* Scalar Timestamp. We have to store a lot of these, so there is
141 some effort to make them as small as possible. Logically they are
142 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
143 We pack it into 64 bits by representing the Thr* using a ThrID, a
144 small integer (18 bits), and a 46 bit integer for the timestamp
145 number. The 46/18 split is arbitary, but has the effect that
146 Helgrind can only handle programs that create 2^18 or fewer threads
147 over their entire lifetime, and have no more than 2^46 timestamp
148 ticks (synchronisation operations on the same thread).
149
150 This doesn't seem like much of a limitation. 2^46 ticks is
151 7.06e+13, and if each tick (optimistically) takes the machine 1000
152 cycles to process, then the minimum time to process that many ticks
153 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
154 but VTS ticks, which isn't realistic.
155
156 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
157 32 since a ThrID is a UInt. 29 comes from the fact that
158 'Thr_n_RCEC', which records information about old accesses, packs
159 not only a ThrID but also 2+1 other bits (access size and
160 writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
161
162 NB2: thrid values are issued upwards from 1024, and values less
163 than that aren't valid. This isn't per se necessary (any order
164 will do, so long as they are unique), but it does help ensure they
165 are less likely to get confused with the various other kinds of
166 small-integer thread ids drifting around (eg, TId). See also NB5.
167
168 NB3: this probably also relies on the fact that Thr's are never
169 deallocated -- they exist forever. Hence the 1-1 mapping from
170 Thr's to thrid values (set up in Thr__new) persists forever.
171
172 NB4: temp_max_sized_VTS is allocated at startup and never freed.
173 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
174 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
175 making the memory use for this go sky-high. With
176 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
177 like an OK tradeoff. If more than 256k threads need to be
178 supported, we could change SCALARTS_N_THRBITS to 20, which would
179 facilitate supporting 1 million threads at the cost of 8MB storage
180 for temp_max_sized_VTS.
181
182 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
183 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
184 must never be a valid ThrID. Given NB2 that's OK.
185*/
186#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
187
188#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
189typedef
190 struct {
191 ThrID thrid : SCALARTS_N_THRBITS;
192 ULong tym : SCALARTS_N_TYMBITS;
193 }
194 ScalarTS;
195
196#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
197
198
199
200/////////////////////////////////////////////////////////////////
201/////////////////////////////////////////////////////////////////
202// //
203// data decls: Filter //
204// //
205/////////////////////////////////////////////////////////////////
206/////////////////////////////////////////////////////////////////
207
208// baseline: 5, 9
209#define FI_LINE_SZB_LOG2 5
210#define FI_NUM_LINES_LOG2 10
211
212#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
213#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
214
215#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
216#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
217
218#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
219 & (Addr)(FI_NUM_LINES-1) )
220
221
222/* In the lines, each 8 bytes are treated individually, and are mapped
223 to a UShort. Regardless of endianness of the underlying machine,
224 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
225 the highest address.
226
227 Of each bit pair, the higher numbered bit is set if a R has been
228 seen, so the actual layout is:
229
230 15 14 ... 01 00
231
232 R W for addr+7 ... R W for addr+0
233
234 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
235*/
236
237/* tags are separated from lines. tags are Addrs and are
238 the base address of the line. */
239typedef
240 struct {
241 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
242 }
243 FiLine;
244
245typedef
246 struct {
247 Addr tags[FI_NUM_LINES];
248 FiLine lines[FI_NUM_LINES];
249 }
250 Filter;
251
252
253
254/////////////////////////////////////////////////////////////////
255/////////////////////////////////////////////////////////////////
256// //
257// data decls: Thr, ULong_n_EC //
258// //
259/////////////////////////////////////////////////////////////////
260/////////////////////////////////////////////////////////////////
261
262// Records stacks for H1 history mechanism (DRD-style)
263typedef
264 struct { ULong ull; ExeContext* ec; }
265 ULong_n_EC;
266
267
268/* How many of the above records to collect for each thread? Older
269 ones are dumped when we run out of space. 62.5k requires 1MB per
270 thread, since each ULong_n_EC record is 16 bytes long. When more
271 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
272 deleted to make space. Hence in the worst case we will be able to
273 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
274 Kw transitions (segments in this thread). For the current setting
275 that gives a guaranteed stack for at least the last 31.25k
276 segments. */
277#define N_KWs_N_STACKs_PER_THREAD 62500
278
279
280struct _Thr {
281 /* Current VTSs for this thread. They change as we go along. viR
282 is the VTS to be used for reads, viW for writes. Usually they
283 are the same, but can differ when we deal with reader-writer
284 locks. It is always the case that
285 VtsID__cmpLEQ(viW,viR) == True
286 that is, viW must be the same, or lagging behind, viR. */
287 VtsID viR;
288 VtsID viW;
289
290 /* Is initially False, and is set to True after the thread really
291 has done a low-level exit. When True, we expect to never see
292 any more memory references done by this thread. */
293 Bool llexit_done;
294
295 /* Is initially False, and is set to True after the thread has been
296 joined with (reaped by some other thread). After this point, we
297 do not expect to see any uses of .viR or .viW, so it is safe to
298 set them to VtsID_INVALID. */
299 Bool joinedwith_done;
300
301 /* A small integer giving a unique identity to this Thr. See
302 comments on the definition of ScalarTS for details. */
303 ThrID thrid : SCALARTS_N_THRBITS;
304
305 /* A filter that removes references for which we believe that
306 msmcread/msmcwrite will not change the state, nor report a
307 race. */
308 Filter* filter;
309
310 /* A pointer back to the top level Thread structure. There is a
311 1-1 mapping between Thread and Thr structures -- each Thr points
312 at its corresponding Thread, and vice versa. Really, Thr and
313 Thread should be merged into a single structure. */
314 Thread* hgthread;
315
316 /* The ULongs (scalar Kws) in this accumulate in strictly
317 increasing order, without duplicates. This is important because
318 we need to be able to find a given scalar Kw in this array
319 later, by binary search. */
320 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
321};
322
323
324
325/////////////////////////////////////////////////////////////////
326/////////////////////////////////////////////////////////////////
327// //
328// data decls: SO //
329// //
330/////////////////////////////////////////////////////////////////
331/////////////////////////////////////////////////////////////////
332
333// (UInt) `echo "Synchronisation object" | md5sum`
334#define SO_MAGIC 0x56b3c5b0U
335
336struct _SO {
337 struct _SO* admin_prev;
338 struct _SO* admin_next;
339 VtsID viR; /* r-clock of sender */
340 VtsID viW; /* w-clock of sender */
341 UInt magic;
342};
343
344
345
346/////////////////////////////////////////////////////////////////
347/////////////////////////////////////////////////////////////////
348// //
sewardj8f5374e2008-12-07 11:40:17 +0000349// Forward declarations //
350// //
351/////////////////////////////////////////////////////////////////
352/////////////////////////////////////////////////////////////////
353
sewardjf98e1c02008-10-25 16:22:41 +0000354/* fwds for
355 Globals needed by other parts of the library. These are set
356 once at startup and then never changed. */
357static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000358static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000359
sewardjffce8152011-06-24 10:09:41 +0000360/* misc fn and data fwdses */
361static void VtsID__rcinc ( VtsID ii );
362static void VtsID__rcdec ( VtsID ii );
363
364static inline Bool SVal__isC ( SVal s );
365static inline VtsID SVal__unC_Rmin ( SVal s );
366static inline VtsID SVal__unC_Wmin ( SVal s );
367static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
philippe1475a7f2015-05-11 19:45:08 +0000368static inline void SVal__rcinc ( SVal s );
369static inline void SVal__rcdec ( SVal s );
philippe71ed3c92015-05-17 19:32:42 +0000370/* SVal in LineZ are used to store various pointers. */
371static inline void *SVal2Ptr (SVal s);
372static inline SVal Ptr2SVal (void* ptr);
sewardjffce8152011-06-24 10:09:41 +0000373
374/* A double linked list of all the SO's. */
375SO* admin_SO;
376
sewardjf98e1c02008-10-25 16:22:41 +0000377
378
379/////////////////////////////////////////////////////////////////
380/////////////////////////////////////////////////////////////////
381// //
382// SECTION BEGIN compressed shadow memory //
383// //
384/////////////////////////////////////////////////////////////////
385/////////////////////////////////////////////////////////////////
386
387#ifndef __HB_ZSM_H
388#define __HB_ZSM_H
389
sewardjf98e1c02008-10-25 16:22:41 +0000390/* Initialise the library. Once initialised, it will (or may) call
philippe1475a7f2015-05-11 19:45:08 +0000391 SVal__rcinc and SVal__rcdec in response to all the calls below, in order to
sewardjf98e1c02008-10-25 16:22:41 +0000392 allow the user to do reference counting on the SVals stored herein.
393 It is important to understand, however, that due to internal
394 caching, the reference counts are in general inaccurate, and can be
395 both above or below the true reference count for an item. In
396 particular, the library may indicate that the reference count for
397 an item is zero, when in fact it is not.
398
399 To make the reference counting exact and therefore non-pointless,
400 call zsm_flush_cache. Immediately after it returns, the reference
401 counts for all items, as deduced by the caller by observing calls
philippe1475a7f2015-05-11 19:45:08 +0000402 to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a
403 zero reference count may be freed (or at least considered to be
sewardjf98e1c02008-10-25 16:22:41 +0000404 unreferenced by this library).
405*/
philippe1475a7f2015-05-11 19:45:08 +0000406static void zsm_init ( void );
sewardjf98e1c02008-10-25 16:22:41 +0000407
sewardj23f12002009-07-24 08:45:08 +0000408static void zsm_sset_range ( Addr, SizeT, SVal );
philippef54cb662015-05-10 22:19:31 +0000409static void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew );
sewardj23f12002009-07-24 08:45:08 +0000410static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000411static void zsm_flush_cache ( void );
412
413#endif /* ! __HB_ZSM_H */
414
415
sewardjf98e1c02008-10-25 16:22:41 +0000416/* Round a up to the next multiple of N. N must be a power of 2 */
417#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
418/* Round a down to the next multiple of N. N must be a power of 2 */
419#define ROUNDDN(a, N) ((a) & ~(N-1))
420
philippef54cb662015-05-10 22:19:31 +0000421/* True if a belongs in range [start, start + szB[
422 (i.e. start + szB is excluded). */
423static inline Bool address_in_range (Addr a, Addr start, SizeT szB)
424{
425 /* Checking start <= a && a < start + szB.
426 As start and a are unsigned addresses, the condition can
427 be simplified. */
428 if (CHECK_ZSM)
429 tl_assert ((a - start < szB)
430 == (start <= a
431 && a < start + szB));
432 return a - start < szB;
433}
sewardjf98e1c02008-10-25 16:22:41 +0000434
sewardjf98e1c02008-10-25 16:22:41 +0000435/* ------ CacheLine ------ */
436
437#define N_LINE_BITS 6 /* must be >= 3 */
438#define N_LINE_ARANGE (1 << N_LINE_BITS)
439#define N_LINE_TREES (N_LINE_ARANGE >> 3)
440
441typedef
442 struct {
443 UShort descrs[N_LINE_TREES];
444 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
445 }
446 CacheLine;
447
448#define TREE_DESCR_16_0 (1<<0)
449#define TREE_DESCR_32_0 (1<<1)
450#define TREE_DESCR_16_1 (1<<2)
451#define TREE_DESCR_64 (1<<3)
452#define TREE_DESCR_16_2 (1<<4)
453#define TREE_DESCR_32_1 (1<<5)
454#define TREE_DESCR_16_3 (1<<6)
455#define TREE_DESCR_8_0 (1<<7)
456#define TREE_DESCR_8_1 (1<<8)
457#define TREE_DESCR_8_2 (1<<9)
458#define TREE_DESCR_8_3 (1<<10)
459#define TREE_DESCR_8_4 (1<<11)
460#define TREE_DESCR_8_5 (1<<12)
461#define TREE_DESCR_8_6 (1<<13)
462#define TREE_DESCR_8_7 (1<<14)
463#define TREE_DESCR_DTY (1<<15)
464
465typedef
466 struct {
467 SVal dict[4]; /* can represent up to 4 diff values in the line */
468 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
469 dict indexes */
philippe71ed3c92015-05-17 19:32:42 +0000470 /* if dict[0] == SVal_INVALID then dict[1] is a pointer to the
sewardjf98e1c02008-10-25 16:22:41 +0000471 LineF to use, and dict[2..] are also SVal_INVALID. */
472 }
473 LineZ; /* compressed rep for a cache line */
474
philippe71ed3c92015-05-17 19:32:42 +0000475/* LineZ.dict[1] is used to store various pointers:
476 * In the first lineZ of a free SecMap, it points to the next free SecMap.
477 * In a lineZ for which we need to use a lineF, it points to the lineF. */
478
479
sewardjf98e1c02008-10-25 16:22:41 +0000480typedef
481 struct {
sewardjf98e1c02008-10-25 16:22:41 +0000482 SVal w64s[N_LINE_ARANGE];
483 }
484 LineF; /* full rep for a cache line */
485
philippe71ed3c92015-05-17 19:32:42 +0000486/* We use a pool allocator for LineF, as LineF is relatively small,
487 and we will often alloc/release such lines. */
488static PoolAlloc* LineF_pool_allocator;
489
490/* SVal in a lineZ are used to store various pointers.
491 Below are conversion functions to support that. */
492static inline LineF *LineF_Ptr (LineZ *lineZ)
493{
494 tl_assert(lineZ->dict[0] == SVal_INVALID);
495 return SVal2Ptr (lineZ->dict[1]);
496}
497
sewardjf98e1c02008-10-25 16:22:41 +0000498/* Shadow memory.
499 Primary map is a WordFM Addr SecMap*.
500 SecMaps cover some page-size-ish section of address space and hold
501 a compressed representation.
502 CacheLine-sized chunks of SecMaps are copied into a Cache, being
503 decompressed when moved into the cache and recompressed on the
504 way out. Because of this, the cache must operate as a writeback
505 cache, not a writethrough one.
506
507 Each SecMap must hold a power-of-2 number of CacheLines. Hence
508 N_SECMAP_BITS must >= N_LINE_BITS.
509*/
510#define N_SECMAP_BITS 13
511#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
512
513// # CacheLines held by a SecMap
514#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
515
516/* The data in the SecMap is held in the array of LineZs. Each LineZ
517 either carries the required data directly, in a compressed
philippe71ed3c92015-05-17 19:32:42 +0000518 representation, or it holds (in .dict[1]) a pointer to a LineF
519 that holds the full representation.
sewardjf98e1c02008-10-25 16:22:41 +0000520
philippe71ed3c92015-05-17 19:32:42 +0000521 As each in-use LineF is referred to by exactly one LineZ,
522 the number of .linesZ[] that refer to a lineF should equal
523 the number of used lineF.
sewardjf98e1c02008-10-25 16:22:41 +0000524
525 RC obligations: the RCs presented to the user include exactly
526 the values in:
527 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
philippe71ed3c92015-05-17 19:32:42 +0000528 * F reps that are in use
sewardjf98e1c02008-10-25 16:22:41 +0000529
530 Hence the following actions at the following transitions are required:
531
philippe71ed3c92015-05-17 19:32:42 +0000532 F rep: alloc'd -> freed -- rcdec_LineF
533 F rep: -> alloc'd -- rcinc_LineF
sewardjf98e1c02008-10-25 16:22:41 +0000534 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
535 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
536*/
philippe71ed3c92015-05-17 19:32:42 +0000537
sewardjf98e1c02008-10-25 16:22:41 +0000538typedef
539 struct {
540 UInt magic;
541 LineZ linesZ[N_SECMAP_ZLINES];
sewardjf98e1c02008-10-25 16:22:41 +0000542 }
543 SecMap;
544
545#define SecMap_MAGIC 0x571e58cbU
546
philippef54cb662015-05-10 22:19:31 +0000547// (UInt) `echo "Free SecMap" | md5sum`
548#define SecMap_free_MAGIC 0x5a977f30U
549
sewardj5aa09bf2014-06-20 14:25:53 +0000550__attribute__((unused))
sewardjf98e1c02008-10-25 16:22:41 +0000551static inline Bool is_sane_SecMap ( SecMap* sm ) {
552 return sm != NULL && sm->magic == SecMap_MAGIC;
553}
554
555/* ------ Cache ------ */
556
557#define N_WAY_BITS 16
558#define N_WAY_NENT (1 << N_WAY_BITS)
559
560/* Each tag is the address of the associated CacheLine, rounded down
561 to a CacheLine address boundary. A CacheLine size must be a power
562 of 2 and must be 8 or more. Hence an easy way to initialise the
563 cache so it is empty is to set all the tag values to any value % 8
564 != 0, eg 1. This means all queries in the cache initially miss.
565 It does however require us to detect and not writeback, any line
566 with a bogus tag. */
567typedef
568 struct {
569 CacheLine lyns0[N_WAY_NENT];
570 Addr tags0[N_WAY_NENT];
571 }
572 Cache;
573
574static inline Bool is_valid_scache_tag ( Addr tag ) {
575 /* a valid tag should be naturally aligned to the start of
576 a CacheLine. */
577 return 0 == (tag & (N_LINE_ARANGE - 1));
578}
579
580
581/* --------- Primary data structures --------- */
582
583/* Shadow memory primary map */
584static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
585static Cache cache_shmem;
586
587
588static UWord stats__secmaps_search = 0; // # SM finds
589static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
590static UWord stats__secmaps_allocd = 0; // # SecMaps issued
philippef54cb662015-05-10 22:19:31 +0000591static UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live'
592static UWord stats__secmaps_scanGC = 0; // # nr of scan GC done.
593static UWord stats__secmaps_scanGCed = 0; // # SecMaps GC-ed via scan
594static UWord stats__secmaps_ssetGCed = 0; // # SecMaps GC-ed via setnoaccess
sewardjf98e1c02008-10-25 16:22:41 +0000595static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
596static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
597static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
sewardjf98e1c02008-10-25 16:22:41 +0000598static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
599static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
600static UWord stats__cache_F_fetches = 0; // # F lines fetched
601static UWord stats__cache_F_wbacks = 0; // # F lines written back
philippef54cb662015-05-10 22:19:31 +0000602static UWord stats__cache_flushes_invals = 0; // # cache flushes and invals
sewardjf98e1c02008-10-25 16:22:41 +0000603static UWord stats__cache_totrefs = 0; // # total accesses
604static UWord stats__cache_totmisses = 0; // # misses
605static ULong stats__cache_make_New_arange = 0; // total arange made New
606static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
607static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000608static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
609static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
610static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
611static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
612static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
613static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
614static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
615static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
616static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
617static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
618static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
619static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
620static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
621static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000622static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
623static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
624static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
625static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
626static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
627static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000628static UWord stats__vts__tick = 0; // # calls to VTS__tick
629static UWord stats__vts__join = 0; // # calls to VTS__join
630static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
631static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
philippef54cb662015-05-10 22:19:31 +0000632static UWord stats__vts_tab_GC = 0; // # nr of vts_tab GC
philippe2bd23262015-05-11 20:56:49 +0000633static UWord stats__vts_pruning = 0; // # nr of vts pruning
sewardj7aa38a92011-02-27 23:04:12 +0000634
635// # calls to VTS__cmp_structural w/ slow case
636static UWord stats__vts__cmp_structural_slow = 0;
637
638// # calls to VTS__indexAt_SLOW
639static UWord stats__vts__indexat_slow = 0;
640
641// # calls to vts_set__find__or__clone_and_add
642static UWord stats__vts_set__focaa = 0;
643
644// # calls to vts_set__find__or__clone_and_add that lead to an
645// allocation
646static UWord stats__vts_set__focaa_a = 0;
sewardjc8028ad2010-05-05 09:34:42 +0000647
sewardjf98e1c02008-10-25 16:22:41 +0000648
649static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
650 return a & ~(N_SECMAP_ARANGE - 1);
651}
652static inline UWord shmem__get_SecMap_offset ( Addr a ) {
653 return a & (N_SECMAP_ARANGE - 1);
654}
655
656
657/*----------------------------------------------------------------*/
658/*--- map_shmem :: WordFM Addr SecMap ---*/
659/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
660/*----------------------------------------------------------------*/
661
662/*--------------- SecMap allocation --------------- */
663
664static HChar* shmem__bigchunk_next = NULL;
665static HChar* shmem__bigchunk_end1 = NULL;
666
667static void* shmem__bigchunk_alloc ( SizeT n )
668{
669 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
670 tl_assert(n > 0);
671 n = VG_ROUNDUP(n, 16);
672 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
673 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
674 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
675 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
676 if (0)
677 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
678 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
679 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
680 if (shmem__bigchunk_next == NULL)
681 VG_(out_of_memory_NORETURN)(
682 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
683 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
684 }
685 tl_assert(shmem__bigchunk_next);
686 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
687 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
688 shmem__bigchunk_next += n;
689 return shmem__bigchunk_next - n;
690}
691
philippef54cb662015-05-10 22:19:31 +0000692/* SecMap changed to be fully SVal_NOACCESS are inserted in a list of
693 recycled SecMap. When a new SecMap is needed, a recycled SecMap
694 will be used in preference to allocating a new SecMap. */
philippe71ed3c92015-05-17 19:32:42 +0000695/* We make a linked list of SecMap. The first LineZ is re-used to
696 implement the linked list. */
697/* Returns the SecMap following sm in the free list.
698 NULL if sm is the last SecMap. sm must be on the free list. */
699static inline SecMap *SecMap_freelist_next ( SecMap* sm )
700{
701 tl_assert (sm);
702 tl_assert (sm->magic == SecMap_free_MAGIC);
703 return SVal2Ptr (sm->linesZ[0].dict[1]);
704}
705static inline void set_SecMap_freelist_next ( SecMap* sm, SecMap* next )
706{
707 tl_assert (sm);
708 tl_assert (sm->magic == SecMap_free_MAGIC);
709 tl_assert (next == NULL || next->magic == SecMap_free_MAGIC);
710 sm->linesZ[0].dict[1] = Ptr2SVal (next);
711}
712
philippef54cb662015-05-10 22:19:31 +0000713static SecMap *SecMap_freelist = NULL;
714static UWord SecMap_freelist_length(void)
715{
716 SecMap *sm;
717 UWord n = 0;
718
719 sm = SecMap_freelist;
720 while (sm) {
721 n++;
philippe71ed3c92015-05-17 19:32:42 +0000722 sm = SecMap_freelist_next (sm);
philippef54cb662015-05-10 22:19:31 +0000723 }
724 return n;
725}
726
727static void push_SecMap_on_freelist(SecMap* sm)
728{
729 if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm);
730 sm->magic = SecMap_free_MAGIC;
philippe71ed3c92015-05-17 19:32:42 +0000731 set_SecMap_freelist_next(sm, SecMap_freelist);
philippef54cb662015-05-10 22:19:31 +0000732 SecMap_freelist = sm;
733}
734/* Returns a free SecMap if there is one.
735 Otherwise, returns NULL. */
736static SecMap *pop_SecMap_from_freelist(void)
737{
738 SecMap *sm;
739
740 sm = SecMap_freelist;
741 if (sm) {
742 tl_assert (sm->magic == SecMap_free_MAGIC);
philippe71ed3c92015-05-17 19:32:42 +0000743 SecMap_freelist = SecMap_freelist_next (sm);
philippef54cb662015-05-10 22:19:31 +0000744 if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm);
745 }
746 return sm;
747}
748
749static SecMap* shmem__alloc_or_recycle_SecMap ( void )
sewardjf98e1c02008-10-25 16:22:41 +0000750{
751 Word i, j;
philippef54cb662015-05-10 22:19:31 +0000752 SecMap* sm = pop_SecMap_from_freelist();
753
754 if (!sm) {
755 sm = shmem__bigchunk_alloc( sizeof(SecMap) );
756 stats__secmaps_allocd++;
757 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
758 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
759 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
760 }
sewardjf98e1c02008-10-25 16:22:41 +0000761 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
762 tl_assert(sm);
763 sm->magic = SecMap_MAGIC;
764 for (i = 0; i < N_SECMAP_ZLINES; i++) {
765 sm->linesZ[i].dict[0] = SVal_NOACCESS;
766 sm->linesZ[i].dict[1] = SVal_INVALID;
767 sm->linesZ[i].dict[2] = SVal_INVALID;
768 sm->linesZ[i].dict[3] = SVal_INVALID;
769 for (j = 0; j < N_LINE_ARANGE/4; j++)
770 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
771 }
sewardjf98e1c02008-10-25 16:22:41 +0000772 return sm;
773}
774
775typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
776static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
777
778static SecMap* shmem__find_SecMap ( Addr ga )
779{
780 SecMap* sm = NULL;
781 Addr gaKey = shmem__round_to_SecMap_base(ga);
782 // Cache
783 stats__secmaps_search++;
784 if (LIKELY(gaKey == smCache[0].gaKey))
785 return smCache[0].sm;
786 if (LIKELY(gaKey == smCache[1].gaKey)) {
787 SMCacheEnt tmp = smCache[0];
788 smCache[0] = smCache[1];
789 smCache[1] = tmp;
790 return smCache[0].sm;
791 }
792 if (gaKey == smCache[2].gaKey) {
793 SMCacheEnt tmp = smCache[1];
794 smCache[1] = smCache[2];
795 smCache[2] = tmp;
796 return smCache[1].sm;
797 }
798 // end Cache
799 stats__secmaps_search_slow++;
800 if (VG_(lookupFM)( map_shmem,
801 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
802 tl_assert(sm != NULL);
803 smCache[2] = smCache[1];
804 smCache[1] = smCache[0];
805 smCache[0].gaKey = gaKey;
806 smCache[0].sm = sm;
807 } else {
808 tl_assert(sm == NULL);
809 }
810 return sm;
811}
812
philippef54cb662015-05-10 22:19:31 +0000813/* Scan the SecMap and count the SecMap that can be GC-ed.
814 If really, really does the GC of the SecMap. */
815/* NOT TO BE CALLED FROM WITHIN libzsm. */
816static UWord next_SecMap_GC_at = 1000;
817__attribute__((noinline))
818static UWord shmem__SecMap_do_GC(Bool really)
819{
820 UWord secmapW = 0;
821 Addr gaKey;
822 UWord examined = 0;
823 UWord ok_GCed = 0;
824
825 /* First invalidate the smCache */
826 smCache[0].gaKey = 1;
827 smCache[1].gaKey = 1;
828 smCache[2].gaKey = 1;
829 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0]));
830
831 VG_(initIterFM)( map_shmem );
832 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
833 UWord i;
834 UWord j;
philippe71ed3c92015-05-17 19:32:42 +0000835 UWord n_linesF = 0;
philippef54cb662015-05-10 22:19:31 +0000836 SecMap* sm = (SecMap*)secmapW;
837 tl_assert(sm->magic == SecMap_MAGIC);
838 Bool ok_to_GC = True;
839
840 examined++;
841
philippe71ed3c92015-05-17 19:32:42 +0000842 /* Deal with the LineZs and the possible LineF of a LineZ. */
philippef54cb662015-05-10 22:19:31 +0000843 for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) {
844 LineZ* lineZ = &sm->linesZ[i];
philippe71ed3c92015-05-17 19:32:42 +0000845 if (lineZ->dict[0] != SVal_INVALID) {
846 ok_to_GC = lineZ->dict[0] == SVal_NOACCESS
philippef54cb662015-05-10 22:19:31 +0000847 && !SVal__isC (lineZ->dict[1])
848 && !SVal__isC (lineZ->dict[2])
philippe71ed3c92015-05-17 19:32:42 +0000849 && !SVal__isC (lineZ->dict[3]);
850 } else {
851 LineF *lineF = LineF_Ptr(lineZ);
852 n_linesF++;
853 for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++)
854 ok_to_GC = lineF->w64s[j] == SVal_NOACCESS;
855 }
philippef54cb662015-05-10 22:19:31 +0000856 }
857 if (ok_to_GC)
858 ok_GCed++;
859 if (ok_to_GC && really) {
860 SecMap *fm_sm;
861 Addr fm_gaKey;
862 /* We cannot remove a SecMap from map_shmem while iterating.
863 So, stop iteration, remove from map_shmem, recreate the iteration
864 on the next SecMap. */
865 VG_(doneIterFM) ( map_shmem );
philippe71ed3c92015-05-17 19:32:42 +0000866 /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS.
867 We just need to free the lineF referenced by the linesZ. */
868 if (n_linesF > 0) {
869 for (i = 0; i < N_SECMAP_ZLINES && n_linesF > 0; i++) {
870 LineZ* lineZ = &sm->linesZ[i];
871 if (lineZ->dict[0] == SVal_INVALID) {
872 VG_(freeEltPA)( LineF_pool_allocator, LineF_Ptr(lineZ) );
873 n_linesF--;
874 }
875 }
philippef54cb662015-05-10 22:19:31 +0000876 }
877 if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey))
878 tl_assert (0);
879 stats__secmaps_in_map_shmem--;
880 tl_assert (gaKey == fm_gaKey);
881 tl_assert (sm == fm_sm);
882 stats__secmaps_scanGCed++;
883 push_SecMap_on_freelist (sm);
884 VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE);
885 }
886 }
887 VG_(doneIterFM)( map_shmem );
888
889 if (really) {
890 stats__secmaps_scanGC++;
891 /* Next GC when we approach the max allocated */
892 next_SecMap_GC_at = stats__secmaps_allocd - 1000;
893 /* Unless we GCed less than 10%. We then allow to alloc 10%
894 more before GCing. This avoids doing a lot of costly GC
895 for the worst case : the 'growing phase' of an application
896 that allocates a lot of memory.
897 Worst can can be reproduced e.g. by
898 perf/memrw -t 30000000 -b 1000 -r 1 -l 1
899 that allocates around 30Gb of memory. */
900 if (ok_GCed < stats__secmaps_allocd/10)
901 next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10;
902
903 }
904
905 if (VG_(clo_stats) && really) {
906 VG_(message)(Vg_DebugMsg,
907 "libhb: SecMap GC: #%lu scanned %lu, GCed %lu,"
908 " next GC at %lu\n",
909 stats__secmaps_scanGC, examined, ok_GCed,
910 next_SecMap_GC_at);
911 }
912
913 return ok_GCed;
914}
915
sewardjf98e1c02008-10-25 16:22:41 +0000916static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
917{
918 SecMap* sm = shmem__find_SecMap ( ga );
919 if (LIKELY(sm)) {
philippef54cb662015-05-10 22:19:31 +0000920 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000921 return sm;
922 } else {
923 /* create a new one */
924 Addr gaKey = shmem__round_to_SecMap_base(ga);
philippef54cb662015-05-10 22:19:31 +0000925 sm = shmem__alloc_or_recycle_SecMap();
sewardjf98e1c02008-10-25 16:22:41 +0000926 tl_assert(sm);
927 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
philippef54cb662015-05-10 22:19:31 +0000928 stats__secmaps_in_map_shmem++;
929 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000930 return sm;
931 }
932}
933
philippe0fb30ac2015-05-15 13:17:17 +0000934/* Returns the nr of linesF which are in use. Note: this is scanning
935 the secmap wordFM. So, this is to be used for statistics only. */
936__attribute__((noinline))
937static UWord shmem__SecMap_used_linesF(void)
938{
939 UWord secmapW = 0;
940 Addr gaKey;
941 UWord inUse = 0;
philippe0fb30ac2015-05-15 13:17:17 +0000942
943 VG_(initIterFM)( map_shmem );
944 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
945 UWord i;
946 SecMap* sm = (SecMap*)secmapW;
947 tl_assert(sm->magic == SecMap_MAGIC);
948
philippe71ed3c92015-05-17 19:32:42 +0000949 for (i = 0; i < N_SECMAP_ZLINES; i++) {
950 LineZ* lineZ = &sm->linesZ[i];
951 if (lineZ->dict[0] == SVal_INVALID)
philippe0fb30ac2015-05-15 13:17:17 +0000952 inUse++;
philippe0fb30ac2015-05-15 13:17:17 +0000953 }
954 }
955 VG_(doneIterFM)( map_shmem );
philippe0fb30ac2015-05-15 13:17:17 +0000956
957 return inUse;
958}
sewardjf98e1c02008-10-25 16:22:41 +0000959
960/* ------------ LineF and LineZ related ------------ */
961
962static void rcinc_LineF ( LineF* lineF ) {
963 UWord i;
sewardjf98e1c02008-10-25 16:22:41 +0000964 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000965 SVal__rcinc(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000966}
967
968static void rcdec_LineF ( LineF* lineF ) {
969 UWord i;
sewardjf98e1c02008-10-25 16:22:41 +0000970 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000971 SVal__rcdec(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000972}
973
974static void rcinc_LineZ ( LineZ* lineZ ) {
975 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000976 SVal__rcinc(lineZ->dict[0]);
977 if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]);
978 if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]);
979 if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000980}
981
982static void rcdec_LineZ ( LineZ* lineZ ) {
983 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000984 SVal__rcdec(lineZ->dict[0]);
985 if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]);
986 if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]);
987 if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000988}
989
990inline
991static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
992 Word bix, shft, mask, prep;
993 tl_assert(ix >= 0);
994 bix = ix >> 2;
995 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
996 mask = 3 << shft;
997 prep = b2 << shft;
998 arr[bix] = (arr[bix] & ~mask) | prep;
999}
1000
1001inline
1002static UWord read_twobit_array ( UChar* arr, UWord ix ) {
1003 Word bix, shft;
1004 tl_assert(ix >= 0);
1005 bix = ix >> 2;
1006 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
1007 return (arr[bix] >> shft) & 3;
1008}
1009
philippe7ab06462015-05-17 21:36:05 +00001010/* We cache one free lineF, to avoid pool allocator calls.
1011 Measurement on firefox has shown that this avoids more than 90%
1012 of the PA calls. */
1013static LineF *free_lineF = NULL;
1014
philippe71ed3c92015-05-17 19:32:42 +00001015/* Allocates a lineF for LineZ. Sets lineZ in a state indicating
1016 lineF has to be used. */
1017static inline LineF *alloc_LineF_for_Z (LineZ *lineZ)
1018{
1019 LineF *lineF;
1020
1021 tl_assert(lineZ->dict[0] == SVal_INVALID);
1022
philippe7ab06462015-05-17 21:36:05 +00001023 if (LIKELY(free_lineF)) {
1024 lineF = free_lineF;
1025 free_lineF = NULL;
1026 } else {
1027 lineF = VG_(allocEltPA) ( LineF_pool_allocator );
1028 }
philippe71ed3c92015-05-17 19:32:42 +00001029 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1030 lineZ->dict[1] = Ptr2SVal (lineF);
1031
1032 return lineF;
1033}
1034
1035/* rcdec the LineF of lineZ, frees the lineF, and sets lineZ
1036 back to its initial state SVal_NOACCESS (i.e. ready to be
1037 read or written just after SecMap allocation). */
1038static inline void clear_LineF_of_Z (LineZ *lineZ)
1039{
1040 LineF *lineF = LineF_Ptr(lineZ);
1041
1042 rcdec_LineF(lineF);
philippe7ab06462015-05-17 21:36:05 +00001043 if (UNLIKELY(free_lineF)) {
1044 VG_(freeEltPA)( LineF_pool_allocator, lineF );
1045 } else {
1046 free_lineF = lineF;
1047 }
philippe71ed3c92015-05-17 19:32:42 +00001048 lineZ->dict[0] = SVal_NOACCESS;
1049 lineZ->dict[1] = SVal_INVALID;
1050}
1051
sewardjf98e1c02008-10-25 16:22:41 +00001052/* Given address 'tag', find either the Z or F line containing relevant
1053 data, so it can be read into the cache.
1054*/
1055static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
1056 /*OUT*/LineF** fp, Addr tag ) {
1057 LineZ* lineZ;
1058 LineF* lineF;
1059 UWord zix;
1060 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1061 UWord smoff = shmem__get_SecMap_offset(tag);
1062 /* since smoff is derived from a valid tag, it should be
1063 cacheline-aligned. */
1064 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1065 zix = smoff >> N_LINE_BITS;
1066 tl_assert(zix < N_SECMAP_ZLINES);
1067 lineZ = &sm->linesZ[zix];
1068 lineF = NULL;
1069 if (lineZ->dict[0] == SVal_INVALID) {
philippe71ed3c92015-05-17 19:32:42 +00001070 lineF = LineF_Ptr (lineZ);
sewardjf98e1c02008-10-25 16:22:41 +00001071 lineZ = NULL;
1072 }
1073 *zp = lineZ;
1074 *fp = lineF;
1075}
1076
1077/* Given address 'tag', return the relevant SecMap and the index of
1078 the LineZ within it, in the expectation that the line is to be
1079 overwritten. Regardless of whether 'tag' is currently associated
1080 with a Z or F representation, to rcdec on the current
1081 representation, in recognition of the fact that the contents are
1082 just about to be overwritten. */
1083static __attribute__((noinline))
1084void find_Z_for_writing ( /*OUT*/SecMap** smp,
1085 /*OUT*/Word* zixp,
1086 Addr tag ) {
1087 LineZ* lineZ;
sewardjf98e1c02008-10-25 16:22:41 +00001088 UWord zix;
1089 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
1090 UWord smoff = shmem__get_SecMap_offset(tag);
1091 /* since smoff is derived from a valid tag, it should be
1092 cacheline-aligned. */
1093 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1094 zix = smoff >> N_LINE_BITS;
1095 tl_assert(zix < N_SECMAP_ZLINES);
1096 lineZ = &sm->linesZ[zix];
philippe71ed3c92015-05-17 19:32:42 +00001097 /* re RCs, we are rcdec_LineZ/clear_LineF_of_Z this LineZ so that new data
1098 can be parked in it. Hence have to rcdec it accordingly. */
sewardjf98e1c02008-10-25 16:22:41 +00001099 /* If lineZ has an associated lineF, free it up. */
philippe71ed3c92015-05-17 19:32:42 +00001100 if (lineZ->dict[0] == SVal_INVALID)
1101 clear_LineF_of_Z(lineZ);
1102 else
sewardjf98e1c02008-10-25 16:22:41 +00001103 rcdec_LineZ(lineZ);
sewardjf98e1c02008-10-25 16:22:41 +00001104 *smp = sm;
1105 *zixp = zix;
1106}
1107
sewardjf98e1c02008-10-25 16:22:41 +00001108/* ------------ CacheLine and implicit-tree related ------------ */
1109
1110__attribute__((unused))
1111static void pp_CacheLine ( CacheLine* cl ) {
1112 Word i;
1113 if (!cl) {
1114 VG_(printf)("%s","pp_CacheLine(NULL)\n");
1115 return;
1116 }
1117 for (i = 0; i < N_LINE_TREES; i++)
1118 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
1119 for (i = 0; i < N_LINE_ARANGE; i++)
1120 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
1121}
1122
1123static UChar descr_to_validbits ( UShort descr )
1124{
1125 /* a.k.a Party Time for gcc's constant folder */
1126# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
1127 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
1128 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
1129 ( (b8_5) << 12) | ( (b8_4) << 11) | \
1130 ( (b8_3) << 10) | ( (b8_2) << 9) | \
1131 ( (b8_1) << 8) | ( (b8_0) << 7) | \
1132 ( (b16_3) << 6) | ( (b32_1) << 5) | \
1133 ( (b16_2) << 4) | ( (b64) << 3) | \
1134 ( (b16_1) << 2) | ( (b32_0) << 1) | \
1135 ( (b16_0) << 0) ) )
1136
1137# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
1138 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
1139 ( (bit5) << 5) | ( (bit4) << 4) | \
1140 ( (bit3) << 3) | ( (bit2) << 2) | \
1141 ( (bit1) << 1) | ( (bit0) << 0) ) )
1142
1143 /* these should all get folded out at compile time */
1144 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
1145 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
1146 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
1147 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
1148 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
1149 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
1150 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
1151 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
1152 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
1153
1154 switch (descr) {
1155 /*
1156 +--------------------------------- TREE_DESCR_8_7
1157 | +------------------- TREE_DESCR_8_0
1158 | | +---------------- TREE_DESCR_16_3
1159 | | | +-------------- TREE_DESCR_32_1
1160 | | | | +------------ TREE_DESCR_16_2
1161 | | | | | +--------- TREE_DESCR_64
1162 | | | | | | +------ TREE_DESCR_16_1
1163 | | | | | | | +---- TREE_DESCR_32_0
1164 | | | | | | | | +-- TREE_DESCR_16_0
1165 | | | | | | | | |
1166 | | | | | | | | | GRANULARITY, 7 -> 0 */
1167 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
1168 return BYTE(1,1,1,1,1,1,1,1);
1169 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
1170 return BYTE(1,1,0,1,1,1,1,1);
1171 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
1172 return BYTE(0,1,1,1,1,1,1,1);
1173 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
1174 return BYTE(0,1,0,1,1,1,1,1);
1175
1176 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
1177 return BYTE(1,1,1,1,1,1,0,1);
1178 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
1179 return BYTE(1,1,0,1,1,1,0,1);
1180 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
1181 return BYTE(0,1,1,1,1,1,0,1);
1182 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
1183 return BYTE(0,1,0,1,1,1,0,1);
1184
1185 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
1186 return BYTE(1,1,1,1,0,1,1,1);
1187 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1188 return BYTE(1,1,0,1,0,1,1,1);
1189 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1190 return BYTE(0,1,1,1,0,1,1,1);
1191 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1192 return BYTE(0,1,0,1,0,1,1,1);
1193
1194 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1195 return BYTE(1,1,1,1,0,1,0,1);
1196 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1197 return BYTE(1,1,0,1,0,1,0,1);
1198 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1199 return BYTE(0,1,1,1,0,1,0,1);
1200 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1201 return BYTE(0,1,0,1,0,1,0,1);
1202
1203 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1204 return BYTE(0,0,0,1,1,1,1,1);
1205 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1206 return BYTE(0,0,0,1,1,1,0,1);
1207 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1208 return BYTE(0,0,0,1,0,1,1,1);
1209 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1210 return BYTE(0,0,0,1,0,1,0,1);
1211
1212 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1213 return BYTE(1,1,1,1,0,0,0,1);
1214 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1215 return BYTE(1,1,0,1,0,0,0,1);
1216 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1217 return BYTE(0,1,1,1,0,0,0,1);
1218 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1219 return BYTE(0,1,0,1,0,0,0,1);
1220
1221 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1222 return BYTE(0,0,0,1,0,0,0,1);
1223
1224 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1225 return BYTE(0,0,0,0,0,0,0,1);
1226
1227 default: return BYTE(0,0,0,0,0,0,0,0);
1228 /* INVALID - any valid descr produces at least one
1229 valid bit in tree[0..7]*/
1230 }
1231 /* NOTREACHED*/
1232 tl_assert(0);
1233
1234# undef DESCR
1235# undef BYTE
1236}
1237
1238__attribute__((unused))
1239static Bool is_sane_Descr ( UShort descr ) {
1240 return descr_to_validbits(descr) != 0;
1241}
1242
1243static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1244 VG_(sprintf)(dst,
1245 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1246 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1247 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1248 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1249 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1250 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1251 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1252 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1253 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1254 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1255 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1256 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1257 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1258 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1259 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1260 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1261 );
1262}
1263static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1264 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1265 (Int)((byte & 128) ? 1 : 0),
1266 (Int)((byte & 64) ? 1 : 0),
1267 (Int)((byte & 32) ? 1 : 0),
1268 (Int)((byte & 16) ? 1 : 0),
1269 (Int)((byte & 8) ? 1 : 0),
1270 (Int)((byte & 4) ? 1 : 0),
1271 (Int)((byte & 2) ? 1 : 0),
1272 (Int)((byte & 1) ? 1 : 0)
1273 );
1274}
1275
1276static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1277 Word i;
1278 UChar validbits = descr_to_validbits(descr);
florian7b7d5942014-12-19 20:29:22 +00001279 HChar buf[128], buf2[128]; // large enough
sewardjf98e1c02008-10-25 16:22:41 +00001280 if (validbits == 0)
1281 goto bad;
1282 for (i = 0; i < 8; i++) {
1283 if (validbits & (1<<i)) {
1284 if (tree[i] == SVal_INVALID)
1285 goto bad;
1286 } else {
1287 if (tree[i] != SVal_INVALID)
1288 goto bad;
1289 }
1290 }
1291 return True;
1292 bad:
1293 sprintf_Descr( buf, descr );
1294 sprintf_Byte( buf2, validbits );
1295 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1296 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1297 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1298 for (i = 0; i < 8; i++)
1299 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1300 VG_(printf)("%s","}\n");
1301 return 0;
1302}
1303
1304static Bool is_sane_CacheLine ( CacheLine* cl )
1305{
1306 Word tno, cloff;
1307
1308 if (!cl) goto bad;
1309
1310 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1311 UShort descr = cl->descrs[tno];
1312 SVal* tree = &cl->svals[cloff];
1313 if (!is_sane_Descr_and_Tree(descr, tree))
1314 goto bad;
1315 }
1316 tl_assert(cloff == N_LINE_ARANGE);
1317 return True;
1318 bad:
1319 pp_CacheLine(cl);
1320 return False;
1321}
1322
1323static UShort normalise_tree ( /*MOD*/SVal* tree )
1324{
1325 UShort descr;
1326 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1327 particular no zeroes. */
philippe1475a7f2015-05-11 19:45:08 +00001328 if (CHECK_ZSM
1329 && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1330 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1331 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1332 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
sewardjf98e1c02008-10-25 16:22:41 +00001333 tl_assert(0);
1334
1335 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1336 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1337 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1338 /* build 16-bit layer */
1339 if (tree[1] == tree[0]) {
1340 tree[1] = SVal_INVALID;
1341 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1342 descr |= TREE_DESCR_16_0;
1343 }
1344 if (tree[3] == tree[2]) {
1345 tree[3] = SVal_INVALID;
1346 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1347 descr |= TREE_DESCR_16_1;
1348 }
1349 if (tree[5] == tree[4]) {
1350 tree[5] = SVal_INVALID;
1351 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1352 descr |= TREE_DESCR_16_2;
1353 }
1354 if (tree[7] == tree[6]) {
1355 tree[7] = SVal_INVALID;
1356 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1357 descr |= TREE_DESCR_16_3;
1358 }
1359 /* build 32-bit layer */
1360 if (tree[2] == tree[0]
1361 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1362 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1363 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1364 descr |= TREE_DESCR_32_0;
1365 }
1366 if (tree[6] == tree[4]
1367 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1368 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1369 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1370 descr |= TREE_DESCR_32_1;
1371 }
1372 /* build 64-bit layer */
1373 if (tree[4] == tree[0]
1374 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1375 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1376 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1377 descr |= TREE_DESCR_64;
1378 }
1379 return descr;
1380}
1381
1382/* This takes a cacheline where all the data is at the leaves
1383 (w8[..]) and builds a correctly normalised tree. */
1384static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1385{
1386 Word tno, cloff;
1387 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1388 SVal* tree = &cl->svals[cloff];
1389 cl->descrs[tno] = normalise_tree( tree );
1390 }
1391 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +00001392 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001393 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1394 stats__cline_normalises++;
1395}
1396
1397
1398typedef struct { UChar count; SVal sval; } CountedSVal;
1399
1400static
1401void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1402 /*OUT*/Word* dstUsedP,
1403 Word nDst, CacheLine* src )
1404{
1405 Word tno, cloff, dstUsed;
1406
1407 tl_assert(nDst == N_LINE_ARANGE);
1408 dstUsed = 0;
1409
1410 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1411 UShort descr = src->descrs[tno];
1412 SVal* tree = &src->svals[cloff];
1413
1414 /* sequentialise the tree described by (descr,tree). */
1415# define PUT(_n,_v) \
1416 do { dst[dstUsed ].count = (_n); \
1417 dst[dstUsed++].sval = (_v); \
1418 } while (0)
1419
1420 /* byte 0 */
1421 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1422 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1423 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1424 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1425 /* byte 1 */
1426 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1427 /* byte 2 */
1428 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1429 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1430 /* byte 3 */
1431 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1432 /* byte 4 */
1433 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1434 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1435 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1436 /* byte 5 */
1437 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1438 /* byte 6 */
1439 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1440 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1441 /* byte 7 */
1442 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1443
1444# undef PUT
1445 /* END sequentialise the tree described by (descr,tree). */
1446
1447 }
1448 tl_assert(cloff == N_LINE_ARANGE);
1449 tl_assert(dstUsed <= nDst);
1450
1451 *dstUsedP = dstUsed;
1452}
1453
1454/* Write the cacheline 'wix' to backing store. Where it ends up
1455 is determined by its tag field. */
1456static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1457{
1458 Word i, j, k, m;
1459 Addr tag;
1460 SecMap* sm;
1461 CacheLine* cl;
1462 LineZ* lineZ;
1463 LineF* lineF;
1464 Word zix, fix, csvalsUsed;
1465 CountedSVal csvals[N_LINE_ARANGE];
1466 SVal sv;
1467
1468 if (0)
1469 VG_(printf)("scache wback line %d\n", (Int)wix);
1470
1471 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1472
1473 tag = cache_shmem.tags0[wix];
1474 cl = &cache_shmem.lyns0[wix];
1475
1476 /* The cache line may have been invalidated; if so, ignore it. */
1477 if (!is_valid_scache_tag(tag))
1478 return;
1479
1480 /* Where are we going to put it? */
1481 sm = NULL;
1482 lineZ = NULL;
1483 lineF = NULL;
1484 zix = fix = -1;
1485
1486 /* find the Z line to write in and rcdec it or the associated F
1487 line. */
1488 find_Z_for_writing( &sm, &zix, tag );
1489
1490 tl_assert(sm);
1491 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1492 lineZ = &sm->linesZ[zix];
1493
1494 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001495 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001496 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1497
1498 csvalsUsed = -1;
1499 sequentialise_CacheLine( csvals, &csvalsUsed,
1500 N_LINE_ARANGE, cl );
1501 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1502 if (0) VG_(printf)("%lu ", csvalsUsed);
1503
1504 lineZ->dict[0] = lineZ->dict[1]
1505 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1506
1507 /* i indexes actual shadow values, k is cursor in csvals */
1508 i = 0;
1509 for (k = 0; k < csvalsUsed; k++) {
1510
1511 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001512 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001513 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1514 /* do we already have it? */
1515 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1516 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1517 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1518 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1519 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001520 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001521 tl_assert(sv != SVal_INVALID);
1522 if (lineZ->dict[0]
1523 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1524 if (lineZ->dict[1]
1525 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1526 if (lineZ->dict[2]
1527 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1528 if (lineZ->dict[3]
1529 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1530 break; /* we'll have to use the f rep */
1531 dict_ok:
1532 m = csvals[k].count;
1533 if (m == 8) {
1534 write_twobit_array( lineZ->ix2s, i+0, j );
1535 write_twobit_array( lineZ->ix2s, i+1, j );
1536 write_twobit_array( lineZ->ix2s, i+2, j );
1537 write_twobit_array( lineZ->ix2s, i+3, j );
1538 write_twobit_array( lineZ->ix2s, i+4, j );
1539 write_twobit_array( lineZ->ix2s, i+5, j );
1540 write_twobit_array( lineZ->ix2s, i+6, j );
1541 write_twobit_array( lineZ->ix2s, i+7, j );
1542 i += 8;
1543 }
1544 else if (m == 4) {
1545 write_twobit_array( lineZ->ix2s, i+0, j );
1546 write_twobit_array( lineZ->ix2s, i+1, j );
1547 write_twobit_array( lineZ->ix2s, i+2, j );
1548 write_twobit_array( lineZ->ix2s, i+3, j );
1549 i += 4;
1550 }
1551 else if (m == 1) {
1552 write_twobit_array( lineZ->ix2s, i+0, j );
1553 i += 1;
1554 }
1555 else if (m == 2) {
1556 write_twobit_array( lineZ->ix2s, i+0, j );
1557 write_twobit_array( lineZ->ix2s, i+1, j );
1558 i += 2;
1559 }
1560 else {
1561 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1562 }
1563
1564 }
1565
1566 if (LIKELY(i == N_LINE_ARANGE)) {
1567 /* Construction of the compressed representation was
1568 successful. */
1569 rcinc_LineZ(lineZ);
1570 stats__cache_Z_wbacks++;
1571 } else {
1572 /* Cannot use the compressed(z) representation. Use the full(f)
1573 rep instead. */
1574 tl_assert(i >= 0 && i < N_LINE_ARANGE);
sewardjf98e1c02008-10-25 16:22:41 +00001575 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
philippe71ed3c92015-05-17 19:32:42 +00001576 lineF = alloc_LineF_for_Z (lineZ);
sewardjf98e1c02008-10-25 16:22:41 +00001577 i = 0;
1578 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001579 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001580 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1581 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001582 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001583 tl_assert(sv != SVal_INVALID);
1584 for (m = csvals[k].count; m > 0; m--) {
1585 lineF->w64s[i] = sv;
1586 i++;
1587 }
1588 }
1589 tl_assert(i == N_LINE_ARANGE);
1590 rcinc_LineF(lineF);
1591 stats__cache_F_wbacks++;
1592 }
sewardjf98e1c02008-10-25 16:22:41 +00001593}
1594
1595/* Fetch the cacheline 'wix' from the backing store. The tag
1596 associated with 'wix' is assumed to have already been filled in;
1597 hence that is used to determine where in the backing store to read
1598 from. */
1599static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1600{
1601 Word i;
1602 Addr tag;
1603 CacheLine* cl;
1604 LineZ* lineZ;
1605 LineF* lineF;
1606
1607 if (0)
1608 VG_(printf)("scache fetch line %d\n", (Int)wix);
1609
1610 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1611
1612 tag = cache_shmem.tags0[wix];
1613 cl = &cache_shmem.lyns0[wix];
1614
1615 /* reject nonsense requests */
1616 tl_assert(is_valid_scache_tag(tag));
1617
1618 lineZ = NULL;
1619 lineF = NULL;
1620 find_ZF_for_reading( &lineZ, &lineF, tag );
1621 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1622
1623 /* expand the data into the bottom layer of the tree, then get
1624 cacheline_normalise to build the descriptor array. */
1625 if (lineF) {
sewardjf98e1c02008-10-25 16:22:41 +00001626 for (i = 0; i < N_LINE_ARANGE; i++) {
1627 cl->svals[i] = lineF->w64s[i];
1628 }
1629 stats__cache_F_fetches++;
1630 } else {
1631 for (i = 0; i < N_LINE_ARANGE; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00001632 UWord ix = read_twobit_array( lineZ->ix2s, i );
philippe1475a7f2015-05-11 19:45:08 +00001633 if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3);
1634 cl->svals[i] = lineZ->dict[ix];
1635 if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00001636 }
1637 stats__cache_Z_fetches++;
1638 }
1639 normalise_CacheLine( cl );
1640}
1641
philippe8939e092015-05-11 20:18:10 +00001642/* Invalid the cachelines corresponding to the given range, which
1643 must start and end on a cacheline boundary. */
philippef54cb662015-05-10 22:19:31 +00001644static void shmem__invalidate_scache_range (Addr ga, SizeT szB)
1645{
philippef54cb662015-05-10 22:19:31 +00001646 Word wix;
1647
philippe8939e092015-05-11 20:18:10 +00001648 /* ga must be on a cacheline boundary. */
1649 tl_assert (is_valid_scache_tag (ga));
1650 /* szB must be a multiple of cacheline size. */
1651 tl_assert (0 == (szB & (N_LINE_ARANGE - 1)));
1652
1653
philippef54cb662015-05-10 22:19:31 +00001654 Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1);
1655 Word nwix = szB / N_LINE_ARANGE;
1656
1657 if (nwix > N_WAY_NENT)
1658 nwix = N_WAY_NENT; // no need to check several times the same entry.
1659
1660 for (wix = 0; wix < nwix; wix++) {
1661 if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB))
1662 cache_shmem.tags0[ga_ix] = 1/*INVALID*/;
1663 ga_ix++;
philippe364f0bb2015-05-15 09:38:54 +00001664 if (UNLIKELY(ga_ix == N_WAY_NENT))
philippef54cb662015-05-10 22:19:31 +00001665 ga_ix = 0;
1666 }
sewardjf98e1c02008-10-25 16:22:41 +00001667}
1668
philippef54cb662015-05-10 22:19:31 +00001669
sewardjf98e1c02008-10-25 16:22:41 +00001670static void shmem__flush_and_invalidate_scache ( void ) {
1671 Word wix;
1672 Addr tag;
1673 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1674 tl_assert(!is_valid_scache_tag(1));
1675 for (wix = 0; wix < N_WAY_NENT; wix++) {
1676 tag = cache_shmem.tags0[wix];
1677 if (tag == 1/*INVALID*/) {
1678 /* already invalid; nothing to do */
1679 } else {
1680 tl_assert(is_valid_scache_tag(tag));
1681 cacheline_wback( wix );
1682 }
1683 cache_shmem.tags0[wix] = 1/*INVALID*/;
1684 }
philippef54cb662015-05-10 22:19:31 +00001685 stats__cache_flushes_invals++;
sewardjf98e1c02008-10-25 16:22:41 +00001686}
1687
1688
1689static inline Bool aligned16 ( Addr a ) {
1690 return 0 == (a & 1);
1691}
1692static inline Bool aligned32 ( Addr a ) {
1693 return 0 == (a & 3);
1694}
1695static inline Bool aligned64 ( Addr a ) {
1696 return 0 == (a & 7);
1697}
1698static inline UWord get_cacheline_offset ( Addr a ) {
1699 return (UWord)(a & (N_LINE_ARANGE - 1));
1700}
1701static inline Addr cacheline_ROUNDUP ( Addr a ) {
1702 return ROUNDUP(a, N_LINE_ARANGE);
1703}
1704static inline Addr cacheline_ROUNDDN ( Addr a ) {
1705 return ROUNDDN(a, N_LINE_ARANGE);
1706}
1707static inline UWord get_treeno ( Addr a ) {
1708 return get_cacheline_offset(a) >> 3;
1709}
1710static inline UWord get_tree_offset ( Addr a ) {
1711 return a & 7;
1712}
1713
1714static __attribute__((noinline))
1715 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1716static inline CacheLine* get_cacheline ( Addr a )
1717{
1718 /* tag is 'a' with the in-line offset masked out,
1719 eg a[31]..a[4] 0000 */
1720 Addr tag = a & ~(N_LINE_ARANGE - 1);
1721 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1722 stats__cache_totrefs++;
1723 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1724 return &cache_shmem.lyns0[wix];
1725 } else {
1726 return get_cacheline_MISS( a );
1727 }
1728}
1729
1730static __attribute__((noinline))
1731 CacheLine* get_cacheline_MISS ( Addr a )
1732{
1733 /* tag is 'a' with the in-line offset masked out,
1734 eg a[31]..a[4] 0000 */
1735
1736 CacheLine* cl;
1737 Addr* tag_old_p;
1738 Addr tag = a & ~(N_LINE_ARANGE - 1);
1739 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1740
1741 tl_assert(tag != cache_shmem.tags0[wix]);
1742
1743 /* Dump the old line into the backing store. */
1744 stats__cache_totmisses++;
1745
1746 cl = &cache_shmem.lyns0[wix];
1747 tag_old_p = &cache_shmem.tags0[wix];
1748
1749 if (is_valid_scache_tag( *tag_old_p )) {
1750 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001751 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001752 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1753 cacheline_wback( wix );
1754 }
1755 /* and reload the new one */
1756 *tag_old_p = tag;
1757 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001758 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001759 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1760 return cl;
1761}
1762
1763static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1764 stats__cline_64to32pulldown++;
1765 switch (toff) {
1766 case 0: case 4:
1767 tl_assert(descr & TREE_DESCR_64);
1768 tree[4] = tree[0];
1769 descr &= ~TREE_DESCR_64;
1770 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1771 break;
1772 default:
1773 tl_assert(0);
1774 }
1775 return descr;
1776}
1777
1778static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1779 stats__cline_32to16pulldown++;
1780 switch (toff) {
1781 case 0: case 2:
1782 if (!(descr & TREE_DESCR_32_0)) {
1783 descr = pulldown_to_32(tree, 0, descr);
1784 }
1785 tl_assert(descr & TREE_DESCR_32_0);
1786 tree[2] = tree[0];
1787 descr &= ~TREE_DESCR_32_0;
1788 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1789 break;
1790 case 4: case 6:
1791 if (!(descr & TREE_DESCR_32_1)) {
1792 descr = pulldown_to_32(tree, 4, descr);
1793 }
1794 tl_assert(descr & TREE_DESCR_32_1);
1795 tree[6] = tree[4];
1796 descr &= ~TREE_DESCR_32_1;
1797 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1798 break;
1799 default:
1800 tl_assert(0);
1801 }
1802 return descr;
1803}
1804
1805static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1806 stats__cline_16to8pulldown++;
1807 switch (toff) {
1808 case 0: case 1:
1809 if (!(descr & TREE_DESCR_16_0)) {
1810 descr = pulldown_to_16(tree, 0, descr);
1811 }
1812 tl_assert(descr & TREE_DESCR_16_0);
1813 tree[1] = tree[0];
1814 descr &= ~TREE_DESCR_16_0;
1815 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1816 break;
1817 case 2: case 3:
1818 if (!(descr & TREE_DESCR_16_1)) {
1819 descr = pulldown_to_16(tree, 2, descr);
1820 }
1821 tl_assert(descr & TREE_DESCR_16_1);
1822 tree[3] = tree[2];
1823 descr &= ~TREE_DESCR_16_1;
1824 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1825 break;
1826 case 4: case 5:
1827 if (!(descr & TREE_DESCR_16_2)) {
1828 descr = pulldown_to_16(tree, 4, descr);
1829 }
1830 tl_assert(descr & TREE_DESCR_16_2);
1831 tree[5] = tree[4];
1832 descr &= ~TREE_DESCR_16_2;
1833 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1834 break;
1835 case 6: case 7:
1836 if (!(descr & TREE_DESCR_16_3)) {
1837 descr = pulldown_to_16(tree, 6, descr);
1838 }
1839 tl_assert(descr & TREE_DESCR_16_3);
1840 tree[7] = tree[6];
1841 descr &= ~TREE_DESCR_16_3;
1842 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1843 break;
1844 default:
1845 tl_assert(0);
1846 }
1847 return descr;
1848}
1849
1850
1851static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1852 UShort mask;
1853 switch (toff) {
1854 case 0:
1855 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1856 tl_assert( (descr & mask) == mask );
1857 descr &= ~mask;
1858 descr |= TREE_DESCR_16_0;
1859 break;
1860 case 2:
1861 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1862 tl_assert( (descr & mask) == mask );
1863 descr &= ~mask;
1864 descr |= TREE_DESCR_16_1;
1865 break;
1866 case 4:
1867 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1868 tl_assert( (descr & mask) == mask );
1869 descr &= ~mask;
1870 descr |= TREE_DESCR_16_2;
1871 break;
1872 case 6:
1873 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1874 tl_assert( (descr & mask) == mask );
1875 descr &= ~mask;
1876 descr |= TREE_DESCR_16_3;
1877 break;
1878 default:
1879 tl_assert(0);
1880 }
1881 return descr;
1882}
1883
1884static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1885 UShort mask;
1886 switch (toff) {
1887 case 0:
1888 if (!(descr & TREE_DESCR_16_0))
1889 descr = pullup_descr_to_16(descr, 0);
1890 if (!(descr & TREE_DESCR_16_1))
1891 descr = pullup_descr_to_16(descr, 2);
1892 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1893 tl_assert( (descr & mask) == mask );
1894 descr &= ~mask;
1895 descr |= TREE_DESCR_32_0;
1896 break;
1897 case 4:
1898 if (!(descr & TREE_DESCR_16_2))
1899 descr = pullup_descr_to_16(descr, 4);
1900 if (!(descr & TREE_DESCR_16_3))
1901 descr = pullup_descr_to_16(descr, 6);
1902 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1903 tl_assert( (descr & mask) == mask );
1904 descr &= ~mask;
1905 descr |= TREE_DESCR_32_1;
1906 break;
1907 default:
1908 tl_assert(0);
1909 }
1910 return descr;
1911}
1912
1913static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1914 switch (toff) {
1915 case 0: case 4:
1916 return 0 != (descr & TREE_DESCR_64);
1917 default:
1918 tl_assert(0);
1919 }
1920}
1921
1922static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1923 switch (toff) {
1924 case 0:
1925 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1926 case 2:
1927 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1928 case 4:
1929 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1930 case 6:
1931 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1932 default:
1933 tl_assert(0);
1934 }
1935}
1936
1937/* ------------ Cache management ------------ */
1938
1939static void zsm_flush_cache ( void )
1940{
1941 shmem__flush_and_invalidate_scache();
1942}
1943
1944
philippe1475a7f2015-05-11 19:45:08 +00001945static void zsm_init ( void )
sewardjf98e1c02008-10-25 16:22:41 +00001946{
1947 tl_assert( sizeof(UWord) == sizeof(Addr) );
1948
sewardjf98e1c02008-10-25 16:22:41 +00001949 tl_assert(map_shmem == NULL);
1950 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1951 HG_(free),
1952 NULL/*unboxed UWord cmp*/);
philippef54cb662015-05-10 22:19:31 +00001953 /* Invalidate all cache entries. */
1954 tl_assert(!is_valid_scache_tag(1));
1955 for (UWord wix = 0; wix < N_WAY_NENT; wix++) {
1956 cache_shmem.tags0[wix] = 1/*INVALID*/;
1957 }
sewardjf98e1c02008-10-25 16:22:41 +00001958
philippe71ed3c92015-05-17 19:32:42 +00001959 LineF_pool_allocator = VG_(newPA) (
1960 sizeof(LineF),
1961 /* Nr elements/pool to fill a core arena block
1962 taking some arena overhead into account. */
1963 (4 * 1024 * 1024 - 200)/sizeof(LineF),
1964 HG_(zalloc),
1965 "libhb.LineF_storage.pool",
1966 HG_(free)
1967 );
1968
sewardjf98e1c02008-10-25 16:22:41 +00001969 /* a SecMap must contain an integral number of CacheLines */
1970 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1971 /* also ... a CacheLine holds an integral number of trees */
1972 tl_assert(0 == (N_LINE_ARANGE % 8));
1973}
1974
1975/////////////////////////////////////////////////////////////////
1976/////////////////////////////////////////////////////////////////
1977// //
1978// SECTION END compressed shadow memory //
1979// //
1980/////////////////////////////////////////////////////////////////
1981/////////////////////////////////////////////////////////////////
1982
1983
1984
1985/////////////////////////////////////////////////////////////////
1986/////////////////////////////////////////////////////////////////
1987// //
1988// SECTION BEGIN vts primitives //
1989// //
1990/////////////////////////////////////////////////////////////////
1991/////////////////////////////////////////////////////////////////
1992
sewardjf98e1c02008-10-25 16:22:41 +00001993
sewardje4cce742011-02-24 15:25:24 +00001994/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1995 being compact stand-ins for Thr*'s. Use these functions to map
1996 between them. */
1997static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
1998static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
1999
sewardje4cce742011-02-24 15:25:24 +00002000__attribute__((noreturn))
2001static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
2002{
2003 if (due_to_nThrs) {
florian6bf37262012-10-21 03:23:36 +00002004 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00002005 "\n"
2006 "Helgrind: cannot continue, run aborted: too many threads.\n"
2007 "Sorry. Helgrind can only handle programs that create\n"
2008 "%'llu or fewer threads over their entire lifetime.\n"
2009 "\n";
sewardj03e7d272011-05-04 09:08:34 +00002010 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
sewardje4cce742011-02-24 15:25:24 +00002011 } else {
florian6bf37262012-10-21 03:23:36 +00002012 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00002013 "\n"
2014 "Helgrind: cannot continue, run aborted: too many\n"
2015 "synchronisation events. Sorry. Helgrind can only handle\n"
2016 "programs which perform %'llu or fewer\n"
2017 "inter-thread synchronisation events (locks, unlocks, etc).\n"
2018 "\n";
2019 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
2020 }
2021 VG_(exit)(1);
2022 /*NOTREACHED*/
2023 tl_assert(0); /*wtf?!*/
2024}
2025
2026
philippec3508652015-03-28 12:01:58 +00002027/* The dead thread (ThrID, actually) tables. A thread may only be
sewardjffce8152011-06-24 10:09:41 +00002028 listed here if we have been notified thereof by libhb_async_exit.
2029 New entries are added at the end. The order isn't important, but
philippec3508652015-03-28 12:01:58 +00002030 the ThrID values must be unique.
2031 verydead_thread_table_not_pruned lists the identity of the threads
2032 that died since the previous round of pruning.
2033 Once pruning is done, these ThrID are added in verydead_thread_table.
2034 We don't actually need to keep the set of threads that have ever died --
sewardjffce8152011-06-24 10:09:41 +00002035 only the threads that have died since the previous round of
2036 pruning. But it's useful for sanity check purposes to keep the
2037 entire set, so we do. */
philippec3508652015-03-28 12:01:58 +00002038static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
sewardjffce8152011-06-24 10:09:41 +00002039static XArray* /* of ThrID */ verydead_thread_table = NULL;
2040
2041/* Arbitrary total ordering on ThrIDs. */
florian6bd9dc12012-11-23 16:17:43 +00002042static Int cmp__ThrID ( const void* v1, const void* v2 ) {
2043 ThrID id1 = *(const ThrID*)v1;
2044 ThrID id2 = *(const ThrID*)v2;
sewardjffce8152011-06-24 10:09:41 +00002045 if (id1 < id2) return -1;
2046 if (id1 > id2) return 1;
2047 return 0;
2048}
2049
philippec3508652015-03-28 12:01:58 +00002050static void verydead_thread_tables_init ( void )
sewardjffce8152011-06-24 10:09:41 +00002051{
2052 tl_assert(!verydead_thread_table);
philippec3508652015-03-28 12:01:58 +00002053 tl_assert(!verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00002054 verydead_thread_table
2055 = VG_(newXA)( HG_(zalloc),
2056 "libhb.verydead_thread_table_init.1",
2057 HG_(free), sizeof(ThrID) );
sewardjffce8152011-06-24 10:09:41 +00002058 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
philippec3508652015-03-28 12:01:58 +00002059 verydead_thread_table_not_pruned
2060 = VG_(newXA)( HG_(zalloc),
2061 "libhb.verydead_thread_table_init.2",
2062 HG_(free), sizeof(ThrID) );
2063 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
sewardjffce8152011-06-24 10:09:41 +00002064}
2065
philippec3508652015-03-28 12:01:58 +00002066static void verydead_thread_table_sort_and_check (XArray* thrids)
2067{
2068 UWord i;
2069
2070 VG_(sortXA)( thrids );
2071 /* Sanity check: check for unique .sts.thr values. */
2072 UWord nBT = VG_(sizeXA)( thrids );
2073 if (nBT > 0) {
2074 ThrID thrid1, thrid2;
2075 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
2076 for (i = 1; i < nBT; i++) {
2077 thrid1 = thrid2;
2078 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
2079 tl_assert(thrid1 < thrid2);
2080 }
2081 }
2082 /* Ok, so the dead thread table thrids has unique and in-order keys. */
2083}
sewardjf98e1c02008-10-25 16:22:41 +00002084
2085/* A VTS contains .ts, its vector clock, and also .id, a field to hold
2086 a backlink for the caller's convenience. Since we have no idea
2087 what to set that to in the library, it always gets set to
2088 VtsID_INVALID. */
2089typedef
2090 struct {
sewardj7aa38a92011-02-27 23:04:12 +00002091 VtsID id;
2092 UInt usedTS;
2093 UInt sizeTS;
2094 ScalarTS ts[0];
sewardjf98e1c02008-10-25 16:22:41 +00002095 }
2096 VTS;
2097
sewardj7aa38a92011-02-27 23:04:12 +00002098/* Allocate a VTS capable of storing 'sizeTS' entries. */
florian6bd9dc12012-11-23 16:17:43 +00002099static VTS* VTS__new ( const HChar* who, UInt sizeTS );
sewardjf98e1c02008-10-25 16:22:41 +00002100
sewardjffce8152011-06-24 10:09:41 +00002101/* Make a clone of 'vts', sizing the new array to exactly match the
sewardj7aa38a92011-02-27 23:04:12 +00002102 number of ScalarTSs present. */
florian6bd9dc12012-11-23 16:17:43 +00002103static VTS* VTS__clone ( const HChar* who, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002104
sewardjffce8152011-06-24 10:09:41 +00002105/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
2106 array is sized exactly to hold the number of required elements.
2107 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
2108 must be in strictly increasing order. */
florian6bd9dc12012-11-23 16:17:43 +00002109static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
sewardjffce8152011-06-24 10:09:41 +00002110
sewardjf98e1c02008-10-25 16:22:41 +00002111/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00002112static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002113
sewardj7aa38a92011-02-27 23:04:12 +00002114/* Create a new singleton VTS in 'out'. Caller must have
2115 pre-allocated 'out' sufficiently big to hold the result in all
2116 possible cases. */
2117static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00002118
sewardj7aa38a92011-02-27 23:04:12 +00002119/* Create in 'out' a VTS which is the same as 'vts' except with
2120 vts[me]++, so to speak. Caller must have pre-allocated 'out'
2121 sufficiently big to hold the result in all possible cases. */
2122static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002123
sewardj7aa38a92011-02-27 23:04:12 +00002124/* Create in 'out' a VTS which is the join (max) of 'a' and
2125 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
2126 the result in all possible cases. */
2127static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002128
sewardj23f12002009-07-24 08:45:08 +00002129/* Compute the partial ordering relation of the two args. Although we
2130 could be completely general and return an enumeration value (EQ,
2131 LT, GT, UN), in fact we only need LEQ, and so we may as well
2132 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00002133
sewardje4cce742011-02-24 15:25:24 +00002134 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
2135 invald ThrID). In the latter case, the returned ThrID indicates
2136 the discovered point for which they are not. There may be more
2137 than one such point, but we only care about seeing one of them, not
2138 all of them. This rather strange convention is used because
2139 sometimes we want to know the actual index at which they first
2140 differ. */
2141static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002142
2143/* Compute an arbitrary structural (total) ordering on the two args,
2144 based on their VCs, so they can be looked up in a table, tree, etc.
2145 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00002146static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002147
florianb28fe892014-10-28 20:52:07 +00002148/* Debugging only. Display the given VTS. */
2149static void VTS__show ( const VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002150
2151/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00002152static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002153
sewardjffce8152011-06-24 10:09:41 +00002154/* Notify the VTS machinery that a thread has been declared
2155 comprehensively dead: that is, it has done an async exit AND it has
2156 been joined with. This should ensure that its local clocks (.viR
2157 and .viW) will never again change, and so all mentions of this
2158 thread from all VTSs in the system may be removed. */
2159static void VTS__declare_thread_very_dead ( Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002160
2161/*--------------- to do with Vector Timestamps ---------------*/
2162
sewardjf98e1c02008-10-25 16:22:41 +00002163static Bool is_sane_VTS ( VTS* vts )
2164{
2165 UWord i, n;
2166 ScalarTS *st1, *st2;
2167 if (!vts) return False;
sewardj555fc572011-02-27 23:39:53 +00002168 if (vts->usedTS > vts->sizeTS) return False;
sewardj7aa38a92011-02-27 23:04:12 +00002169 n = vts->usedTS;
2170 if (n == 1) {
2171 st1 = &vts->ts[0];
2172 if (st1->tym == 0)
2173 return False;
2174 }
2175 else
sewardjf98e1c02008-10-25 16:22:41 +00002176 if (n >= 2) {
2177 for (i = 0; i < n-1; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002178 st1 = &vts->ts[i];
2179 st2 = &vts->ts[i+1];
sewardje4cce742011-02-24 15:25:24 +00002180 if (st1->thrid >= st2->thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002181 return False;
2182 if (st1->tym == 0 || st2->tym == 0)
2183 return False;
2184 }
2185 }
2186 return True;
2187}
2188
2189
sewardj7aa38a92011-02-27 23:04:12 +00002190/* Create a new, empty VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002191*/
florian6bd9dc12012-11-23 16:17:43 +00002192static VTS* VTS__new ( const HChar* who, UInt sizeTS )
sewardjf98e1c02008-10-25 16:22:41 +00002193{
sewardj7aa38a92011-02-27 23:04:12 +00002194 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
2195 tl_assert(vts->usedTS == 0);
2196 vts->sizeTS = sizeTS;
2197 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
sewardjf98e1c02008-10-25 16:22:41 +00002198 return vts;
2199}
2200
sewardj7aa38a92011-02-27 23:04:12 +00002201/* Clone this VTS.
2202*/
florian6bd9dc12012-11-23 16:17:43 +00002203static VTS* VTS__clone ( const HChar* who, VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002204{
2205 tl_assert(vts);
2206 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2207 UInt nTS = vts->usedTS;
2208 VTS* clone = VTS__new(who, nTS);
2209 clone->id = vts->id;
2210 clone->sizeTS = nTS;
2211 clone->usedTS = nTS;
2212 UInt i;
2213 for (i = 0; i < nTS; i++) {
2214 clone->ts[i] = vts->ts[i];
2215 }
2216 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2217 return clone;
2218}
2219
sewardjf98e1c02008-10-25 16:22:41 +00002220
sewardjffce8152011-06-24 10:09:41 +00002221/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
2222 must be in strictly increasing order. We could obviously do this
2223 much more efficiently (in linear time) if necessary.
2224*/
florian6bd9dc12012-11-23 16:17:43 +00002225static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
sewardjffce8152011-06-24 10:09:41 +00002226{
2227 UInt i, j;
2228 tl_assert(vts);
2229 tl_assert(thridsToDel);
2230 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2231 UInt nTS = vts->usedTS;
2232 /* Figure out how many ScalarTSs will remain in the output. */
2233 UInt nReq = nTS;
2234 for (i = 0; i < nTS; i++) {
2235 ThrID thrid = vts->ts[i].thrid;
2236 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2237 nReq--;
2238 }
2239 tl_assert(nReq <= nTS);
2240 /* Copy the ones that will remain. */
2241 VTS* res = VTS__new(who, nReq);
2242 j = 0;
2243 for (i = 0; i < nTS; i++) {
2244 ThrID thrid = vts->ts[i].thrid;
2245 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2246 continue;
2247 res->ts[j++] = vts->ts[i];
2248 }
2249 tl_assert(j == nReq);
2250 tl_assert(j == res->sizeTS);
2251 res->usedTS = j;
2252 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2253 return res;
2254}
2255
2256
sewardjf98e1c02008-10-25 16:22:41 +00002257/* Delete this VTS in its entirety.
2258*/
sewardj7aa38a92011-02-27 23:04:12 +00002259static void VTS__delete ( VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002260{
2261 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002262 tl_assert(vts->usedTS <= vts->sizeTS);
2263 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
sewardjf98e1c02008-10-25 16:22:41 +00002264 HG_(free)(vts);
2265}
2266
2267
2268/* Create a new singleton VTS.
2269*/
sewardj7aa38a92011-02-27 23:04:12 +00002270static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2271{
sewardjf98e1c02008-10-25 16:22:41 +00002272 tl_assert(thr);
2273 tl_assert(tym >= 1);
sewardj7aa38a92011-02-27 23:04:12 +00002274 tl_assert(out);
2275 tl_assert(out->usedTS == 0);
2276 tl_assert(out->sizeTS >= 1);
2277 UInt hi = out->usedTS++;
2278 out->ts[hi].thrid = Thr__to_ThrID(thr);
2279 out->ts[hi].tym = tym;
sewardjf98e1c02008-10-25 16:22:41 +00002280}
2281
2282
2283/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2284 not modified.
2285*/
sewardj7aa38a92011-02-27 23:04:12 +00002286static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002287{
sewardj7aa38a92011-02-27 23:04:12 +00002288 UInt i, n;
sewardje4cce742011-02-24 15:25:24 +00002289 ThrID me_thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002290 Bool found = False;
sewardjc8028ad2010-05-05 09:34:42 +00002291
2292 stats__vts__tick++;
2293
sewardj7aa38a92011-02-27 23:04:12 +00002294 tl_assert(out);
2295 tl_assert(out->usedTS == 0);
2296 if (vts->usedTS >= ThrID_MAX_VALID)
2297 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2298 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2299
sewardjf98e1c02008-10-25 16:22:41 +00002300 tl_assert(me);
sewardje4cce742011-02-24 15:25:24 +00002301 me_thrid = Thr__to_ThrID(me);
sewardjf98e1c02008-10-25 16:22:41 +00002302 tl_assert(is_sane_VTS(vts));
sewardj7aa38a92011-02-27 23:04:12 +00002303 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002304
sewardj555fc572011-02-27 23:39:53 +00002305 /* Copy all entries which precede 'me'. */
2306 for (i = 0; i < n; i++) {
2307 ScalarTS* here = &vts->ts[i];
2308 if (UNLIKELY(here->thrid >= me_thrid))
2309 break;
2310 UInt hi = out->usedTS++;
2311 out->ts[hi] = *here;
2312 }
2313
2314 /* 'i' now indicates the next entry to copy, if any.
2315 There are 3 possibilities:
2316 (a) there is no next entry (we used them all up already):
2317 add (me_thrid,1) to the output, and quit
2318 (b) there is a next entry, and its thrid > me_thrid:
2319 add (me_thrid,1) to the output, then copy the remaining entries
2320 (c) there is a next entry, and its thrid == me_thrid:
2321 copy it to the output but increment its timestamp value.
2322 Then copy the remaining entries. (c) is the common case.
2323 */
2324 tl_assert(i >= 0 && i <= n);
2325 if (i == n) { /* case (a) */
sewardj7aa38a92011-02-27 23:04:12 +00002326 UInt hi = out->usedTS++;
2327 out->ts[hi].thrid = me_thrid;
2328 out->ts[hi].tym = 1;
sewardj555fc572011-02-27 23:39:53 +00002329 } else {
2330 /* cases (b) and (c) */
2331 ScalarTS* here = &vts->ts[i];
2332 if (me_thrid == here->thrid) { /* case (c) */
sewardj7aa38a92011-02-27 23:04:12 +00002333 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
sewardje4cce742011-02-24 15:25:24 +00002334 /* We're hosed. We have to stop. */
2335 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2336 }
sewardj7aa38a92011-02-27 23:04:12 +00002337 UInt hi = out->usedTS++;
2338 out->ts[hi].thrid = here->thrid;
2339 out->ts[hi].tym = here->tym + 1;
sewardjf98e1c02008-10-25 16:22:41 +00002340 i++;
sewardj555fc572011-02-27 23:39:53 +00002341 found = True;
2342 } else { /* case (b) */
sewardj7aa38a92011-02-27 23:04:12 +00002343 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002344 out->ts[hi].thrid = me_thrid;
2345 out->ts[hi].tym = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002346 }
sewardj555fc572011-02-27 23:39:53 +00002347 /* And copy any remaining entries. */
sewardjf98e1c02008-10-25 16:22:41 +00002348 for (/*keepgoing*/; i < n; i++) {
sewardj555fc572011-02-27 23:39:53 +00002349 ScalarTS* here2 = &vts->ts[i];
sewardj7aa38a92011-02-27 23:04:12 +00002350 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002351 out->ts[hi] = *here2;
sewardjf98e1c02008-10-25 16:22:41 +00002352 }
2353 }
sewardj555fc572011-02-27 23:39:53 +00002354
sewardj7aa38a92011-02-27 23:04:12 +00002355 tl_assert(is_sane_VTS(out));
2356 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2357 tl_assert(out->usedTS <= out->sizeTS);
sewardjf98e1c02008-10-25 16:22:41 +00002358}
2359
2360
2361/* Return a new VTS constructed as the join (max) of the 2 args.
2362 Neither arg is modified.
2363*/
sewardj7aa38a92011-02-27 23:04:12 +00002364static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002365{
sewardj7aa38a92011-02-27 23:04:12 +00002366 UInt ia, ib, useda, usedb;
sewardjf98e1c02008-10-25 16:22:41 +00002367 ULong tyma, tymb, tymMax;
sewardje4cce742011-02-24 15:25:24 +00002368 ThrID thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002369 UInt ncommon = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002370
sewardjc8028ad2010-05-05 09:34:42 +00002371 stats__vts__join++;
2372
sewardj7aa38a92011-02-27 23:04:12 +00002373 tl_assert(a);
2374 tl_assert(b);
2375 useda = a->usedTS;
2376 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002377
sewardj7aa38a92011-02-27 23:04:12 +00002378 tl_assert(out);
2379 tl_assert(out->usedTS == 0);
2380 /* overly conservative test, but doing better involves comparing
2381 the two VTSs, which we don't want to do at this point. */
2382 if (useda + usedb >= ThrID_MAX_VALID)
2383 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2384 tl_assert(out->sizeTS >= useda + usedb);
2385
sewardjf98e1c02008-10-25 16:22:41 +00002386 ia = ib = 0;
2387
2388 while (1) {
2389
sewardje4cce742011-02-24 15:25:24 +00002390 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2391 from a and b in order, where thrid is the next ThrID
sewardjf98e1c02008-10-25 16:22:41 +00002392 occurring in either a or b, and tyma/b are the relevant
2393 scalar timestamps, taking into account implicit zeroes. */
2394 tl_assert(ia >= 0 && ia <= useda);
2395 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002396
njn4c245e52009-03-15 23:25:38 +00002397 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002398 /* both empty - done */
2399 break;
njn4c245e52009-03-15 23:25:38 +00002400
2401 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002402 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002403 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002404 thrid = tmpb->thrid;
2405 tyma = 0;
2406 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002407 ib++;
njn4c245e52009-03-15 23:25:38 +00002408
2409 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002410 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002411 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002412 thrid = tmpa->thrid;
2413 tyma = tmpa->tym;
2414 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002415 ia++;
njn4c245e52009-03-15 23:25:38 +00002416
2417 } else {
sewardje4cce742011-02-24 15:25:24 +00002418 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002419 ScalarTS* tmpa = &a->ts[ia];
2420 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002421 if (tmpa->thrid < tmpb->thrid) {
2422 /* a has the lowest unconsidered ThrID */
2423 thrid = tmpa->thrid;
2424 tyma = tmpa->tym;
2425 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002426 ia++;
sewardje4cce742011-02-24 15:25:24 +00002427 } else if (tmpa->thrid > tmpb->thrid) {
2428 /* b has the lowest unconsidered ThrID */
2429 thrid = tmpb->thrid;
2430 tyma = 0;
2431 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002432 ib++;
2433 } else {
sewardje4cce742011-02-24 15:25:24 +00002434 /* they both next mention the same ThrID */
2435 tl_assert(tmpa->thrid == tmpb->thrid);
2436 thrid = tmpa->thrid; /* == tmpb->thrid */
2437 tyma = tmpa->tym;
2438 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002439 ia++;
2440 ib++;
sewardj7aa38a92011-02-27 23:04:12 +00002441 ncommon++;
sewardjf98e1c02008-10-25 16:22:41 +00002442 }
2443 }
2444
2445 /* having laboriously determined (thr, tyma, tymb), do something
2446 useful with it. */
2447 tymMax = tyma > tymb ? tyma : tymb;
2448 if (tymMax > 0) {
sewardj7aa38a92011-02-27 23:04:12 +00002449 UInt hi = out->usedTS++;
2450 out->ts[hi].thrid = thrid;
2451 out->ts[hi].tym = tymMax;
sewardjf98e1c02008-10-25 16:22:41 +00002452 }
2453
2454 }
2455
sewardj7aa38a92011-02-27 23:04:12 +00002456 tl_assert(is_sane_VTS(out));
2457 tl_assert(out->usedTS <= out->sizeTS);
2458 tl_assert(out->usedTS == useda + usedb - ncommon);
sewardjf98e1c02008-10-25 16:22:41 +00002459}
2460
2461
sewardje4cce742011-02-24 15:25:24 +00002462/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2463 they are, or the first ThrID for which they are not (no valid ThrID
2464 has the value zero). This rather strange convention is used
2465 because sometimes we want to know the actual index at which they
2466 first differ. */
2467static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002468{
sewardj23f12002009-07-24 08:45:08 +00002469 Word ia, ib, useda, usedb;
2470 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00002471
sewardjc8028ad2010-05-05 09:34:42 +00002472 stats__vts__cmpLEQ++;
2473
sewardj7aa38a92011-02-27 23:04:12 +00002474 tl_assert(a);
2475 tl_assert(b);
2476 useda = a->usedTS;
2477 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002478
2479 ia = ib = 0;
2480
2481 while (1) {
2482
njn4c245e52009-03-15 23:25:38 +00002483 /* This logic is to enumerate doubles (tyma, tymb) drawn
2484 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00002485 scalar timestamps, taking into account implicit zeroes. */
sewardje4cce742011-02-24 15:25:24 +00002486 ThrID thrid;
sewardj23f12002009-07-24 08:45:08 +00002487
sewardjf98e1c02008-10-25 16:22:41 +00002488 tl_assert(ia >= 0 && ia <= useda);
2489 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002490
njn4c245e52009-03-15 23:25:38 +00002491 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002492 /* both empty - done */
2493 break;
njn4c245e52009-03-15 23:25:38 +00002494
2495 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002496 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002497 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002498 tyma = 0;
2499 tymb = tmpb->tym;
2500 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002501 ib++;
njn4c245e52009-03-15 23:25:38 +00002502
2503 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002504 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002505 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002506 tyma = tmpa->tym;
2507 thrid = tmpa->thrid;
2508 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002509 ia++;
njn4c245e52009-03-15 23:25:38 +00002510
2511 } else {
sewardje4cce742011-02-24 15:25:24 +00002512 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002513 ScalarTS* tmpa = &a->ts[ia];
2514 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002515 if (tmpa->thrid < tmpb->thrid) {
2516 /* a has the lowest unconsidered ThrID */
2517 tyma = tmpa->tym;
2518 thrid = tmpa->thrid;
2519 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002520 ia++;
2521 }
2522 else
sewardje4cce742011-02-24 15:25:24 +00002523 if (tmpa->thrid > tmpb->thrid) {
2524 /* b has the lowest unconsidered ThrID */
2525 tyma = 0;
2526 tymb = tmpb->tym;
2527 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002528 ib++;
2529 } else {
sewardje4cce742011-02-24 15:25:24 +00002530 /* they both next mention the same ThrID */
2531 tl_assert(tmpa->thrid == tmpb->thrid);
2532 tyma = tmpa->tym;
2533 thrid = tmpa->thrid;
2534 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002535 ia++;
2536 ib++;
2537 }
2538 }
2539
njn4c245e52009-03-15 23:25:38 +00002540 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00002541 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00002542 if (tyma > tymb) {
2543 /* not LEQ at this index. Quit, since the answer is
2544 determined already. */
sewardje4cce742011-02-24 15:25:24 +00002545 tl_assert(thrid >= 1024);
2546 return thrid;
sewardj23f12002009-07-24 08:45:08 +00002547 }
sewardjf98e1c02008-10-25 16:22:41 +00002548 }
2549
sewardje4cce742011-02-24 15:25:24 +00002550 return 0; /* all points are LEQ => return an invalid ThrID */
sewardjf98e1c02008-10-25 16:22:41 +00002551}
2552
2553
2554/* Compute an arbitrary structural (total) ordering on the two args,
2555 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00002556 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2557 performance critical so there is some effort expended to make it sa
2558 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00002559*/
2560Word VTS__cmp_structural ( VTS* a, VTS* b )
2561{
2562 /* We just need to generate an arbitrary total ordering based on
2563 a->ts and b->ts. Preferably do it in a way which comes across likely
2564 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00002565 Word i;
2566 Word useda = 0, usedb = 0;
2567 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002568
sewardjc8028ad2010-05-05 09:34:42 +00002569 stats__vts__cmp_structural++;
2570
2571 tl_assert(a);
2572 tl_assert(b);
2573
sewardj7aa38a92011-02-27 23:04:12 +00002574 ctsa = &a->ts[0]; useda = a->usedTS;
2575 ctsb = &b->ts[0]; usedb = b->usedTS;
sewardjc8028ad2010-05-05 09:34:42 +00002576
2577 if (LIKELY(useda == usedb)) {
2578 ScalarTS *tmpa = NULL, *tmpb = NULL;
2579 stats__vts__cmp_structural_slow++;
2580 /* Same length vectors. Find the first difference, if any, as
2581 fast as possible. */
2582 for (i = 0; i < useda; i++) {
2583 tmpa = &ctsa[i];
2584 tmpb = &ctsb[i];
sewardje4cce742011-02-24 15:25:24 +00002585 if (LIKELY(tmpa->tym == tmpb->tym
2586 && tmpa->thrid == tmpb->thrid))
sewardjc8028ad2010-05-05 09:34:42 +00002587 continue;
2588 else
2589 break;
2590 }
2591 if (UNLIKELY(i == useda)) {
2592 /* They're identical. */
2593 return 0;
2594 } else {
2595 tl_assert(i >= 0 && i < useda);
2596 if (tmpa->tym < tmpb->tym) return -1;
2597 if (tmpa->tym > tmpb->tym) return 1;
sewardje4cce742011-02-24 15:25:24 +00002598 if (tmpa->thrid < tmpb->thrid) return -1;
2599 if (tmpa->thrid > tmpb->thrid) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002600 /* we just established them as non-identical, hence: */
2601 }
2602 /*NOTREACHED*/
2603 tl_assert(0);
2604 }
sewardjf98e1c02008-10-25 16:22:41 +00002605
2606 if (useda < usedb) return -1;
2607 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002608 /*NOTREACHED*/
2609 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00002610}
2611
2612
florianb28fe892014-10-28 20:52:07 +00002613/* Debugging only. Display the given VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002614*/
florianb28fe892014-10-28 20:52:07 +00002615static void VTS__show ( const VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002616{
sewardjf98e1c02008-10-25 16:22:41 +00002617 Word i, n;
florian4367abe2015-02-28 09:22:09 +00002618 tl_assert(vts);
florianb28fe892014-10-28 20:52:07 +00002619
2620 VG_(printf)("[");
sewardj7aa38a92011-02-27 23:04:12 +00002621 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002622 for (i = 0; i < n; i++) {
florianb28fe892014-10-28 20:52:07 +00002623 const ScalarTS *st = &vts->ts[i];
2624 VG_(printf)(i < n-1 ? "%u:%llu " : "%u:%llu", st->thrid, (ULong)st->tym);
sewardjf98e1c02008-10-25 16:22:41 +00002625 }
florianb28fe892014-10-28 20:52:07 +00002626 VG_(printf)("]");
sewardjf98e1c02008-10-25 16:22:41 +00002627}
2628
2629
2630/* Debugging only. Return vts[index], so to speak.
2631*/
sewardj7aa38a92011-02-27 23:04:12 +00002632ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2633{
sewardjf98e1c02008-10-25 16:22:41 +00002634 UWord i, n;
sewardje4cce742011-02-24 15:25:24 +00002635 ThrID idx_thrid = Thr__to_ThrID(idx);
sewardjc8028ad2010-05-05 09:34:42 +00002636 stats__vts__indexat_slow++;
florian4367abe2015-02-28 09:22:09 +00002637 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002638 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002639 for (i = 0; i < n; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002640 ScalarTS* st = &vts->ts[i];
sewardje4cce742011-02-24 15:25:24 +00002641 if (st->thrid == idx_thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002642 return st->tym;
2643 }
2644 return 0;
2645}
2646
2647
sewardjffce8152011-06-24 10:09:41 +00002648/* See comment on prototype above.
2649*/
2650static void VTS__declare_thread_very_dead ( Thr* thr )
2651{
2652 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2653
2654 tl_assert(thr->llexit_done);
2655 tl_assert(thr->joinedwith_done);
2656
2657 ThrID nyu;
2658 nyu = Thr__to_ThrID(thr);
philippec3508652015-03-28 12:01:58 +00002659 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
sewardjffce8152011-06-24 10:09:41 +00002660
2661 /* We can only get here if we're assured that we'll never again
2662 need to look at this thread's ::viR or ::viW. Set them to
2663 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2664 mostly so that we don't wind up pruning them (as that would be
2665 nonsensical: the only interesting ScalarTS entry for a dead
2666 thread is its own index, and the pruning will remove that.). */
2667 VtsID__rcdec(thr->viR);
2668 VtsID__rcdec(thr->viW);
2669 thr->viR = VtsID_INVALID;
2670 thr->viW = VtsID_INVALID;
2671}
2672
2673
sewardjf98e1c02008-10-25 16:22:41 +00002674/////////////////////////////////////////////////////////////////
2675/////////////////////////////////////////////////////////////////
2676// //
2677// SECTION END vts primitives //
2678// //
2679/////////////////////////////////////////////////////////////////
2680/////////////////////////////////////////////////////////////////
2681
2682
2683
2684/////////////////////////////////////////////////////////////////
2685/////////////////////////////////////////////////////////////////
2686// //
2687// SECTION BEGIN main library //
2688// //
2689/////////////////////////////////////////////////////////////////
2690/////////////////////////////////////////////////////////////////
2691
2692
2693/////////////////////////////////////////////////////////
2694// //
2695// VTS set //
2696// //
2697/////////////////////////////////////////////////////////
2698
sewardjffce8152011-06-24 10:09:41 +00002699static WordFM* /* WordFM VTS* void */ vts_set = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002700
2701static void vts_set_init ( void )
2702{
2703 tl_assert(!vts_set);
2704 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2705 HG_(free),
2706 (Word(*)(UWord,UWord))VTS__cmp_structural );
sewardjf98e1c02008-10-25 16:22:41 +00002707}
2708
sewardj7aa38a92011-02-27 23:04:12 +00002709/* Given a VTS, look in vts_set to see if we already have a
2710 structurally identical one. If yes, return the pair (True, pointer
2711 to the existing one). If no, clone this one, add the clone to the
2712 set, and return (False, pointer to the clone). */
2713static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002714{
2715 UWord keyW, valW;
sewardj7aa38a92011-02-27 23:04:12 +00002716 stats__vts_set__focaa++;
2717 tl_assert(cand->id == VtsID_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00002718 /* lookup cand (by value) */
2719 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2720 /* found it */
2721 tl_assert(valW == 0);
2722 /* if this fails, cand (by ref) was already present (!) */
2723 tl_assert(keyW != (UWord)cand);
sewardj7aa38a92011-02-27 23:04:12 +00002724 *res = (VTS*)keyW;
2725 return True;
sewardjf98e1c02008-10-25 16:22:41 +00002726 } else {
sewardj7aa38a92011-02-27 23:04:12 +00002727 /* not present. Clone, add and return address of clone. */
2728 stats__vts_set__focaa_a++;
2729 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2730 tl_assert(clone != cand);
2731 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2732 *res = clone;
2733 return False;
sewardjf98e1c02008-10-25 16:22:41 +00002734 }
2735}
2736
2737
2738/////////////////////////////////////////////////////////
2739// //
2740// VTS table //
2741// //
2742/////////////////////////////////////////////////////////
2743
2744static void VtsID__invalidate_caches ( void ); /* fwds */
2745
2746/* A type to hold VTS table entries. Invariants:
2747 If .vts == NULL, then this entry is not in use, so:
2748 - .rc == 0
2749 - this entry is on the freelist (unfortunately, does not imply
philippea1ac2f42015-05-01 17:12:00 +00002750 any constraints on value for u.freelink)
sewardjf98e1c02008-10-25 16:22:41 +00002751 If .vts != NULL, then this entry is in use:
2752 - .vts is findable in vts_set
2753 - .vts->id == this entry number
2754 - no specific value for .rc (even 0 is OK)
philippea1ac2f42015-05-01 17:12:00 +00002755 - this entry is not on freelist, so u.freelink == VtsID_INVALID
sewardjf98e1c02008-10-25 16:22:41 +00002756*/
2757typedef
2758 struct {
2759 VTS* vts; /* vts, in vts_set */
2760 UWord rc; /* reference count - enough for entire aspace */
philippea1ac2f42015-05-01 17:12:00 +00002761 union {
2762 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2763 VtsID remap; /* used only during pruning, for used entries */
2764 } u;
2765 /* u.freelink only used when vts == NULL,
2766 u.remap only used when vts != NULL, during pruning. */
sewardjf98e1c02008-10-25 16:22:41 +00002767 }
2768 VtsTE;
2769
2770/* The VTS table. */
2771static XArray* /* of VtsTE */ vts_tab = NULL;
2772
2773/* An index into the VTS table, indicating the start of the list of
2774 free (available for use) entries. If the list is empty, this is
2775 VtsID_INVALID. */
2776static VtsID vts_tab_freelist = VtsID_INVALID;
2777
2778/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2779 vts_tab equals or exceeds this size. After GC, the value here is
2780 set appropriately so as to check for the next GC point. */
2781static Word vts_next_GC_at = 1000;
2782
2783static void vts_tab_init ( void )
2784{
florian91ed8cc2014-09-15 18:50:17 +00002785 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2786 HG_(free), sizeof(VtsTE) );
2787 vts_tab_freelist = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002788}
2789
2790/* Add ii to the free list, checking that it looks out-of-use. */
2791static void add_to_free_list ( VtsID ii )
2792{
2793 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2794 tl_assert(ie->vts == NULL);
2795 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002796 tl_assert(ie->u.freelink == VtsID_INVALID);
2797 ie->u.freelink = vts_tab_freelist;
sewardjf98e1c02008-10-25 16:22:41 +00002798 vts_tab_freelist = ii;
2799}
2800
2801/* Get an entry from the free list. This will return VtsID_INVALID if
2802 the free list is empty. */
2803static VtsID get_from_free_list ( void )
2804{
2805 VtsID ii;
2806 VtsTE* ie;
2807 if (vts_tab_freelist == VtsID_INVALID)
2808 return VtsID_INVALID;
2809 ii = vts_tab_freelist;
2810 ie = VG_(indexXA)( vts_tab, ii );
2811 tl_assert(ie->vts == NULL);
2812 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002813 vts_tab_freelist = ie->u.freelink;
sewardjf98e1c02008-10-25 16:22:41 +00002814 return ii;
2815}
2816
2817/* Produce a new VtsID that can be used, either by getting it from
2818 the freelist, or, if that is empty, by expanding vts_tab. */
2819static VtsID get_new_VtsID ( void )
2820{
2821 VtsID ii;
2822 VtsTE te;
2823 ii = get_from_free_list();
2824 if (ii != VtsID_INVALID)
2825 return ii;
2826 te.vts = NULL;
2827 te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002828 te.u.freelink = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002829 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2830 return ii;
2831}
2832
2833
2834/* Indirect callback from lib_zsm. */
2835static void VtsID__rcinc ( VtsID ii )
2836{
2837 VtsTE* ie;
2838 /* VG_(indexXA) does a range check for us */
2839 ie = VG_(indexXA)( vts_tab, ii );
2840 tl_assert(ie->vts); /* else it's not in use */
2841 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2842 tl_assert(ie->vts->id == ii);
2843 ie->rc++;
2844}
2845
2846/* Indirect callback from lib_zsm. */
2847static void VtsID__rcdec ( VtsID ii )
2848{
2849 VtsTE* ie;
2850 /* VG_(indexXA) does a range check for us */
2851 ie = VG_(indexXA)( vts_tab, ii );
2852 tl_assert(ie->vts); /* else it's not in use */
2853 tl_assert(ie->rc > 0); /* else RC snafu */
2854 tl_assert(ie->vts->id == ii);
2855 ie->rc--;
2856}
2857
2858
sewardj7aa38a92011-02-27 23:04:12 +00002859/* Look up 'cand' in our collection of VTSs. If present, return the
2860 VtsID for the pre-existing version. If not present, clone it, add
2861 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2862 it, and return that. */
2863static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002864{
sewardj7aa38a92011-02-27 23:04:12 +00002865 VTS* in_tab = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002866 tl_assert(cand->id == VtsID_INVALID);
sewardj7aa38a92011-02-27 23:04:12 +00002867 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2868 tl_assert(in_tab);
2869 if (already_have) {
2870 /* We already have a copy of 'cand'. Use that. */
sewardjf98e1c02008-10-25 16:22:41 +00002871 VtsTE* ie;
sewardj7aa38a92011-02-27 23:04:12 +00002872 tl_assert(in_tab->id != VtsID_INVALID);
2873 ie = VG_(indexXA)( vts_tab, in_tab->id );
2874 tl_assert(ie->vts == in_tab);
2875 return in_tab->id;
sewardjf98e1c02008-10-25 16:22:41 +00002876 } else {
2877 VtsID ii = get_new_VtsID();
2878 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
sewardj7aa38a92011-02-27 23:04:12 +00002879 ie->vts = in_tab;
sewardjf98e1c02008-10-25 16:22:41 +00002880 ie->rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002881 ie->u.freelink = VtsID_INVALID;
sewardj7aa38a92011-02-27 23:04:12 +00002882 in_tab->id = ii;
sewardjf98e1c02008-10-25 16:22:41 +00002883 return ii;
2884 }
2885}
2886
2887
florian6bd9dc12012-11-23 16:17:43 +00002888static void show_vts_stats ( const HChar* caller )
sewardjf98e1c02008-10-25 16:22:41 +00002889{
2890 UWord nSet, nTab, nLive;
2891 ULong totrc;
2892 UWord n, i;
2893 nSet = VG_(sizeFM)( vts_set );
2894 nTab = VG_(sizeXA)( vts_tab );
2895 totrc = 0;
2896 nLive = 0;
2897 n = VG_(sizeXA)( vts_tab );
2898 for (i = 0; i < n; i++) {
2899 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2900 if (ie->vts) {
2901 nLive++;
2902 totrc += (ULong)ie->rc;
2903 } else {
2904 tl_assert(ie->rc == 0);
2905 }
2906 }
2907 VG_(printf)(" show_vts_stats %s\n", caller);
2908 VG_(printf)(" vts_tab size %4lu\n", nTab);
2909 VG_(printf)(" vts_tab live %4lu\n", nLive);
2910 VG_(printf)(" vts_set size %4lu\n", nSet);
2911 VG_(printf)(" total rc %4llu\n", totrc);
2912}
2913
sewardjffce8152011-06-24 10:09:41 +00002914
2915/* --- Helpers for VtsID pruning --- */
2916
2917static
2918void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2919 /*MOD*/XArray* /* of VtsTE */ new_tab,
2920 VtsID* ii )
2921{
2922 VtsTE *old_te, *new_te;
2923 VtsID old_id, new_id;
2924 /* We're relying here on VG_(indexXA)'s range checking to assert on
2925 any stupid values, in particular *ii == VtsID_INVALID. */
2926 old_id = *ii;
2927 old_te = VG_(indexXA)( old_tab, old_id );
2928 old_te->rc--;
philippea1ac2f42015-05-01 17:12:00 +00002929 new_id = old_te->u.remap;
sewardjffce8152011-06-24 10:09:41 +00002930 new_te = VG_(indexXA)( new_tab, new_id );
2931 new_te->rc++;
2932 *ii = new_id;
2933}
2934
2935static
2936void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2937 /*MOD*/XArray* /* of VtsTE */ new_tab,
2938 SVal* s )
2939{
2940 SVal old_sv, new_sv;
2941 old_sv = *s;
2942 if (SVal__isC(old_sv)) {
2943 VtsID rMin, wMin;
2944 rMin = SVal__unC_Rmin(old_sv);
2945 wMin = SVal__unC_Wmin(old_sv);
2946 remap_VtsID( old_tab, new_tab, &rMin );
2947 remap_VtsID( old_tab, new_tab, &wMin );
2948 new_sv = SVal__mkC( rMin, wMin );
2949 *s = new_sv;
2950 }
2951}
2952
2953
sewardjf98e1c02008-10-25 16:22:41 +00002954/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002955__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002956static void vts_tab__do_GC ( Bool show_stats )
2957{
2958 UWord i, nTab, nLive, nFreed;
2959
sewardjffce8152011-06-24 10:09:41 +00002960 /* ---------- BEGIN VTS GC ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00002961 /* check this is actually necessary. */
2962 tl_assert(vts_tab_freelist == VtsID_INVALID);
2963
2964 /* empty the caches for partial order checks and binary joins. We
2965 could do better and prune out the entries to be deleted, but it
2966 ain't worth the hassle. */
2967 VtsID__invalidate_caches();
2968
2969 /* First, make the reference counts up to date. */
2970 zsm_flush_cache();
2971
2972 nTab = VG_(sizeXA)( vts_tab );
2973
2974 if (show_stats) {
2975 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2976 show_vts_stats("before GC");
2977 }
2978
sewardjffce8152011-06-24 10:09:41 +00002979 /* Now we can inspect the entire vts_tab. Any entries with zero
2980 .rc fields are now no longer in use and can be put back on the
sewardjf98e1c02008-10-25 16:22:41 +00002981 free list, removed from vts_set, and deleted. */
2982 nFreed = 0;
2983 for (i = 0; i < nTab; i++) {
2984 Bool present;
sewardjffce8152011-06-24 10:09:41 +00002985 UWord oldK = 0, oldV = 12345;
sewardjf98e1c02008-10-25 16:22:41 +00002986 VtsTE* te = VG_(indexXA)( vts_tab, i );
2987 if (te->vts == NULL) {
2988 tl_assert(te->rc == 0);
2989 continue; /* already on the free list (presumably) */
2990 }
2991 if (te->rc > 0)
2992 continue; /* in use */
2993 /* Ok, we got one we can free. */
2994 tl_assert(te->vts->id == i);
2995 /* first, remove it from vts_set. */
2996 present = VG_(delFromFM)( vts_set,
2997 &oldK, &oldV, (UWord)te->vts );
2998 tl_assert(present); /* else it isn't in vts_set ?! */
2999 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3000 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
3001 /* now free the VTS itself */
3002 VTS__delete(te->vts);
3003 te->vts = NULL;
3004 /* and finally put this entry on the free list */
philippea1ac2f42015-05-01 17:12:00 +00003005 tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
sewardjf98e1c02008-10-25 16:22:41 +00003006 add_to_free_list( i );
3007 nFreed++;
3008 }
3009
3010 /* Now figure out when the next GC should be. We'll allow the
3011 number of VTSs to double before GCing again. Except of course
3012 that since we can't (or, at least, don't) shrink vts_tab, we
3013 can't set the threshhold value smaller than it. */
3014 tl_assert(nFreed <= nTab);
3015 nLive = nTab - nFreed;
3016 tl_assert(nLive >= 0 && nLive <= nTab);
3017 vts_next_GC_at = 2 * nLive;
3018 if (vts_next_GC_at < nTab)
3019 vts_next_GC_at = nTab;
3020
3021 if (show_stats) {
3022 show_vts_stats("after GC");
3023 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
3024 }
3025
philippe2bd23262015-05-11 20:56:49 +00003026 stats__vts_tab_GC++;
sewardj5e2ac3b2009-08-11 10:39:25 +00003027 if (VG_(clo_stats)) {
sewardjf98e1c02008-10-25 16:22:41 +00003028 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00003029 VG_(message)(Vg_DebugMsg,
philippef54cb662015-05-10 22:19:31 +00003030 "libhb: VTS GC: #%lu old size %lu live %lu (%2llu%%)\n",
3031 stats__vts_tab_GC,
3032 nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00003033 }
sewardjffce8152011-06-24 10:09:41 +00003034 /* ---------- END VTS GC ---------- */
3035
3036 /* Decide whether to do VTS pruning. We have one of three
3037 settings. */
3038 static UInt pruning_auto_ctr = 0; /* do not make non-static */
3039
3040 Bool do_pruning = False;
3041 switch (HG_(clo_vts_pruning)) {
3042 case 0: /* never */
3043 break;
3044 case 1: /* auto */
3045 do_pruning = (++pruning_auto_ctr % 5) == 0;
3046 break;
3047 case 2: /* always */
3048 do_pruning = True;
3049 break;
3050 default:
3051 tl_assert(0);
3052 }
3053
3054 /* The rest of this routine only handles pruning, so we can
3055 quit at this point if it is not to be done. */
3056 if (!do_pruning)
3057 return;
philippec3508652015-03-28 12:01:58 +00003058 /* No need to do pruning if no thread died since the last pruning as
3059 no VtsTE can be pruned. */
3060 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
3061 return;
sewardjffce8152011-06-24 10:09:41 +00003062
3063 /* ---------- BEGIN VTS PRUNING ---------- */
philippec3508652015-03-28 12:01:58 +00003064 /* Sort and check the very dead threads that died since the last pruning.
3065 Sorting is used for the check and so that we can quickly look
sewardjffce8152011-06-24 10:09:41 +00003066 up the dead-thread entries as we work through the VTSs. */
philippec3508652015-03-28 12:01:58 +00003067 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003068
3069 /* We will run through the old table, and create a new table and
philippea1ac2f42015-05-01 17:12:00 +00003070 set, at the same time setting the u.remap entries in the old
sewardjffce8152011-06-24 10:09:41 +00003071 table to point to the new entries. Then, visit every VtsID in
3072 the system, and replace all of them with new ones, using the
philippea1ac2f42015-05-01 17:12:00 +00003073 u.remap entries in the old table. Finally, we can delete the old
sewardjffce8152011-06-24 10:09:41 +00003074 table and set. */
3075
3076 XArray* /* of VtsTE */ new_tab
3077 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
3078 HG_(free), sizeof(VtsTE) );
3079
3080 /* WordFM VTS* void */
3081 WordFM* new_set
3082 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
3083 HG_(free),
3084 (Word(*)(UWord,UWord))VTS__cmp_structural );
3085
3086 /* Visit each old VTS. For each one:
3087
3088 * make a pruned version
3089
3090 * search new_set for the pruned version, yielding either
3091 Nothing (not present) or the new VtsID for it.
3092
3093 * if not present, allocate a new VtsID for it, insert (pruned
3094 VTS, new VtsID) in the tree, and set
3095 remap_table[old VtsID] = new VtsID.
3096
3097 * if present, set remap_table[old VtsID] = new VtsID, where
3098 new VtsID was determined by the tree lookup. Then free up
3099 the clone.
3100 */
3101
3102 UWord nBeforePruning = 0, nAfterPruning = 0;
3103 UWord nSTSsBefore = 0, nSTSsAfter = 0;
3104 VtsID new_VtsID_ctr = 0;
3105
3106 for (i = 0; i < nTab; i++) {
3107
3108 /* For each old VTS .. */
3109 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
3110 VTS* old_vts = old_te->vts;
sewardjffce8152011-06-24 10:09:41 +00003111
3112 /* Skip it if not in use */
3113 if (old_te->rc == 0) {
3114 tl_assert(old_vts == NULL);
3115 continue;
3116 }
philippea1ac2f42015-05-01 17:12:00 +00003117 tl_assert(old_te->u.remap == VtsID_INVALID);
sewardjffce8152011-06-24 10:09:41 +00003118 tl_assert(old_vts != NULL);
3119 tl_assert(old_vts->id == i);
3120 tl_assert(old_vts->ts != NULL);
3121
3122 /* It is in use. Make a pruned version. */
3123 nBeforePruning++;
3124 nSTSsBefore += old_vts->usedTS;
3125 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
philippec3508652015-03-28 12:01:58 +00003126 old_vts, verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003127 tl_assert(new_vts->sizeTS == new_vts->usedTS);
3128 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
3129 == 0x0ddC0ffeeBadF00dULL);
3130
3131 /* Get rid of the old VTS and the tree entry. It's a bit more
3132 complex to incrementally delete the VTSs now than to nuke
3133 them all after we're done, but the upside is that we don't
3134 wind up temporarily storing potentially two complete copies
3135 of each VTS and hence spiking memory use. */
3136 UWord oldK = 0, oldV = 12345;
3137 Bool present = VG_(delFromFM)( vts_set,
3138 &oldK, &oldV, (UWord)old_vts );
3139 tl_assert(present); /* else it isn't in vts_set ?! */
3140 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3141 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
3142 /* now free the VTS itself */
3143 VTS__delete(old_vts);
3144 old_te->vts = NULL;
3145 old_vts = NULL;
3146
3147 /* NO MENTIONS of old_vts allowed beyond this point. */
3148
3149 /* Ok, we have the pruned copy in new_vts. See if a
3150 structurally identical version is already present in new_set.
3151 If so, delete the one we just made and move on; if not, add
3152 it. */
3153 VTS* identical_version = NULL;
3154 UWord valW = 12345;
3155 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
3156 (UWord)new_vts)) {
3157 // already have it
3158 tl_assert(valW == 0);
3159 tl_assert(identical_version != NULL);
3160 tl_assert(identical_version != new_vts);
3161 VTS__delete(new_vts);
3162 new_vts = identical_version;
3163 tl_assert(new_vts->id != VtsID_INVALID);
3164 } else {
3165 tl_assert(valW == 12345);
3166 tl_assert(identical_version == NULL);
3167 new_vts->id = new_VtsID_ctr++;
3168 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
3169 tl_assert(!b);
3170 VtsTE new_te;
3171 new_te.vts = new_vts;
3172 new_te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00003173 new_te.u.freelink = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003174 Word j = VG_(addToXA)( new_tab, &new_te );
3175 tl_assert(j <= i);
3176 tl_assert(j == new_VtsID_ctr - 1);
3177 // stats
3178 nAfterPruning++;
3179 nSTSsAfter += new_vts->usedTS;
3180 }
philippea1ac2f42015-05-01 17:12:00 +00003181 old_te->u.remap = new_vts->id;
sewardjffce8152011-06-24 10:09:41 +00003182
3183 } /* for (i = 0; i < nTab; i++) */
3184
philippec3508652015-03-28 12:01:58 +00003185 /* Move very dead thread from verydead_thread_table_not_pruned to
3186 verydead_thread_table. Sort and check verydead_thread_table
3187 to verify a thread was reported very dead only once. */
3188 {
3189 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
3190
3191 for (i = 0; i < nBT; i++) {
3192 ThrID thrid =
3193 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
3194 VG_(addToXA)( verydead_thread_table, &thrid );
3195 }
3196 verydead_thread_table_sort_and_check (verydead_thread_table);
3197 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
3198 }
3199
sewardjffce8152011-06-24 10:09:41 +00003200 /* At this point, we have:
philippea1ac2f42015-05-01 17:12:00 +00003201 * the old VTS table, with its u.remap entries set,
sewardjffce8152011-06-24 10:09:41 +00003202 and with all .vts == NULL.
3203 * the old VTS tree should be empty, since it and the old VTSs
3204 it contained have been incrementally deleted was we worked
3205 through the old table.
philippea1ac2f42015-05-01 17:12:00 +00003206 * the new VTS table, with all .rc == 0, all u.freelink and u.remap
sewardjffce8152011-06-24 10:09:41 +00003207 == VtsID_INVALID.
3208 * the new VTS tree.
3209 */
3210 tl_assert( VG_(sizeFM)(vts_set) == 0 );
3211
3212 /* Now actually apply the mapping. */
3213 /* Visit all the VtsIDs in the entire system. Where do we expect
3214 to find them?
3215 (a) in shadow memory -- the LineZs and LineFs
3216 (b) in our collection of struct _Thrs.
3217 (c) in our collection of struct _SOs.
3218 Nowhere else, AFAICS. Not in the zsm cache, because that just
3219 got invalidated.
3220
philippea1ac2f42015-05-01 17:12:00 +00003221 Using the u.remap fields in vts_tab, map each old VtsID to a new
sewardjffce8152011-06-24 10:09:41 +00003222 VtsID. For each old VtsID, dec its rc; and for each new one,
3223 inc it. This sets up the new refcounts, and it also gives a
3224 cheap sanity check of the old ones: all old refcounts should be
3225 zero after this operation.
3226 */
3227
3228 /* Do the mappings for (a) above: iterate over the Primary shadow
3229 mem map (WordFM Addr SecMap*). */
3230 UWord secmapW = 0;
3231 VG_(initIterFM)( map_shmem );
3232 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3233 UWord j;
3234 SecMap* sm = (SecMap*)secmapW;
3235 tl_assert(sm->magic == SecMap_MAGIC);
3236 /* Deal with the LineZs */
3237 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3238 LineZ* lineZ = &sm->linesZ[i];
philippe71ed3c92015-05-17 19:32:42 +00003239 if (lineZ->dict[0] != SVal_INVALID) {
3240 for (j = 0; j < 4; j++)
3241 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3242 } else {
3243 LineF* lineF = SVal2Ptr (lineZ->dict[1]);
3244 for (j = 0; j < N_LINE_ARANGE; j++)
3245 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3246 }
sewardjffce8152011-06-24 10:09:41 +00003247 }
3248 }
3249 VG_(doneIterFM)( map_shmem );
3250
3251 /* Do the mappings for (b) above: visit our collection of struct
3252 _Thrs. */
3253 Thread* hgthread = get_admin_threads();
3254 tl_assert(hgthread);
3255 while (hgthread) {
3256 Thr* hbthr = hgthread->hbthr;
3257 tl_assert(hbthr);
3258 /* Threads that are listed in the prunable set have their viR
3259 and viW set to VtsID_INVALID, so we can't mess with them. */
3260 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3261 tl_assert(hbthr->viR == VtsID_INVALID);
3262 tl_assert(hbthr->viW == VtsID_INVALID);
3263 hgthread = hgthread->admin;
3264 continue;
3265 }
3266 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3267 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3268 hgthread = hgthread->admin;
3269 }
3270
3271 /* Do the mappings for (c) above: visit the struct _SOs. */
3272 SO* so = admin_SO;
3273 while (so) {
3274 if (so->viR != VtsID_INVALID)
3275 remap_VtsID( vts_tab, new_tab, &so->viR );
3276 if (so->viW != VtsID_INVALID)
3277 remap_VtsID( vts_tab, new_tab, &so->viW );
3278 so = so->admin_next;
3279 }
3280
3281 /* So, we're nearly done (with this incredibly complex operation).
3282 Check the refcounts for the old VtsIDs all fell to zero, as
3283 expected. Any failure is serious. */
3284 for (i = 0; i < nTab; i++) {
3285 VtsTE* te = VG_(indexXA)( vts_tab, i );
3286 tl_assert(te->vts == NULL);
3287 /* This is the assert proper. Note we're also asserting
philippea1ac2f42015-05-01 17:12:00 +00003288 zeroness for old entries which are unmapped. That's OK. */
sewardjffce8152011-06-24 10:09:41 +00003289 tl_assert(te->rc == 0);
3290 }
3291
3292 /* Install the new table and set. */
3293 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3294 vts_set = new_set;
3295 VG_(deleteXA)( vts_tab );
3296 vts_tab = new_tab;
3297
3298 /* The freelist of vts_tab entries is empty now, because we've
3299 compacted all of the live entries at the low end of the
3300 table. */
3301 vts_tab_freelist = VtsID_INVALID;
3302
3303 /* Sanity check vts_set and vts_tab. */
3304
3305 /* Because all the live entries got slid down to the bottom of vts_tab: */
3306 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3307
3308 /* Assert that the vts_tab and vts_set entries point at each other
3309 in the required way */
3310 UWord wordK = 0, wordV = 0;
3311 VG_(initIterFM)( vts_set );
3312 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3313 tl_assert(wordK != 0);
3314 tl_assert(wordV == 0);
3315 VTS* vts = (VTS*)wordK;
3316 tl_assert(vts->id != VtsID_INVALID);
3317 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3318 tl_assert(te->vts == vts);
3319 }
3320 VG_(doneIterFM)( vts_set );
3321
3322 /* Also iterate over the table, and check each entry is
3323 plausible. */
3324 nTab = VG_(sizeXA)( vts_tab );
3325 for (i = 0; i < nTab; i++) {
3326 VtsTE* te = VG_(indexXA)( vts_tab, i );
3327 tl_assert(te->vts);
3328 tl_assert(te->vts->id == i);
3329 tl_assert(te->rc > 0); /* 'cos we just GC'd */
philippea1ac2f42015-05-01 17:12:00 +00003330 tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3331 /* value of te->u.remap not relevant */
sewardjffce8152011-06-24 10:09:41 +00003332 }
3333
3334 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
philippe2bd23262015-05-11 20:56:49 +00003335 stats__vts_pruning++;
sewardjffce8152011-06-24 10:09:41 +00003336 if (VG_(clo_stats)) {
sewardjffce8152011-06-24 10:09:41 +00003337 tl_assert(nTab > 0);
3338 VG_(message)(
3339 Vg_DebugMsg,
philippe2bd23262015-05-11 20:56:49 +00003340 "libhb: VTS PR: #%lu before %lu (avg sz %lu) "
sewardjffce8152011-06-24 10:09:41 +00003341 "after %lu (avg sz %lu)\n",
philippe2bd23262015-05-11 20:56:49 +00003342 stats__vts_pruning,
sewardjffce8152011-06-24 10:09:41 +00003343 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3344 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3345 );
3346 }
sewardjffce8152011-06-24 10:09:41 +00003347 /* ---------- END VTS PRUNING ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00003348}
3349
3350
3351/////////////////////////////////////////////////////////
3352// //
3353// Vts IDs //
3354// //
3355/////////////////////////////////////////////////////////
3356
3357//////////////////////////
sewardj7aa38a92011-02-27 23:04:12 +00003358/* A temporary, max-sized VTS which is used as a temporary (the first
3359 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3360static VTS* temp_max_sized_VTS = NULL;
3361
3362//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00003363static ULong stats__cmpLEQ_queries = 0;
3364static ULong stats__cmpLEQ_misses = 0;
3365static ULong stats__join2_queries = 0;
3366static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003367
3368static inline UInt ROL32 ( UInt w, Int n ) {
3369 w = (w << n) | (w >> (32-n));
3370 return w;
3371}
3372static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3373 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3374 return hash % nTab;
3375}
3376
sewardj23f12002009-07-24 08:45:08 +00003377#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00003378static
sewardj23f12002009-07-24 08:45:08 +00003379 struct { VtsID vi1; VtsID vi2; Bool leq; }
3380 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00003381
3382#define N_JOIN2_CACHE 1023
3383static
3384 struct { VtsID vi1; VtsID vi2; VtsID res; }
3385 join2_cache[N_JOIN2_CACHE];
3386
3387static void VtsID__invalidate_caches ( void ) {
3388 Int i;
sewardj23f12002009-07-24 08:45:08 +00003389 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3390 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3391 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3392 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00003393 }
3394 for (i = 0; i < N_JOIN2_CACHE; i++) {
3395 join2_cache[i].vi1 = VtsID_INVALID;
3396 join2_cache[i].vi2 = VtsID_INVALID;
3397 join2_cache[i].res = VtsID_INVALID;
3398 }
3399}
3400//////////////////////////
3401
sewardjd52392d2008-11-08 20:36:26 +00003402//static Bool VtsID__is_valid ( VtsID vi ) {
3403// VtsTE* ve;
3404// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3405// return False;
3406// ve = VG_(indexXA)( vts_tab, vi );
3407// if (!ve->vts)
3408// return False;
3409// tl_assert(ve->vts->id == vi);
3410// return True;
3411//}
sewardjf98e1c02008-10-25 16:22:41 +00003412
3413static VTS* VtsID__to_VTS ( VtsID vi ) {
3414 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3415 tl_assert(te->vts);
3416 return te->vts;
3417}
3418
3419static void VtsID__pp ( VtsID vi ) {
sewardjf98e1c02008-10-25 16:22:41 +00003420 VTS* vts = VtsID__to_VTS(vi);
florianb28fe892014-10-28 20:52:07 +00003421 VTS__show( vts );
sewardjf98e1c02008-10-25 16:22:41 +00003422}
3423
3424/* compute partial ordering relation of vi1 and vi2. */
3425__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00003426static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00003427 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00003428 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00003429 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00003430 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00003431 tl_assert(vi1 != vi2);
3432 ////++
sewardj23f12002009-07-24 08:45:08 +00003433 stats__cmpLEQ_queries++;
3434 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3435 if (cmpLEQ_cache[hash].vi1 == vi1
3436 && cmpLEQ_cache[hash].vi2 == vi2)
3437 return cmpLEQ_cache[hash].leq;
3438 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00003439 ////--
3440 v1 = VtsID__to_VTS(vi1);
3441 v2 = VtsID__to_VTS(vi2);
sewardje4cce742011-02-24 15:25:24 +00003442 leq = VTS__cmpLEQ( v1, v2 ) == 0;
sewardjf98e1c02008-10-25 16:22:41 +00003443 ////++
sewardj23f12002009-07-24 08:45:08 +00003444 cmpLEQ_cache[hash].vi1 = vi1;
3445 cmpLEQ_cache[hash].vi2 = vi2;
3446 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00003447 ////--
sewardj23f12002009-07-24 08:45:08 +00003448 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00003449}
sewardj23f12002009-07-24 08:45:08 +00003450static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3451 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003452}
3453
3454/* compute binary join */
3455__attribute__((noinline))
3456static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3457 UInt hash;
3458 VtsID res;
sewardj7aa38a92011-02-27 23:04:12 +00003459 VTS *vts1, *vts2;
sewardjf98e1c02008-10-25 16:22:41 +00003460 //if (vi1 == vi2) return vi1;
3461 tl_assert(vi1 != vi2);
3462 ////++
3463 stats__join2_queries++;
3464 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3465 if (join2_cache[hash].vi1 == vi1
3466 && join2_cache[hash].vi2 == vi2)
3467 return join2_cache[hash].res;
3468 stats__join2_misses++;
3469 ////--
3470 vts1 = VtsID__to_VTS(vi1);
3471 vts2 = VtsID__to_VTS(vi2);
sewardj7aa38a92011-02-27 23:04:12 +00003472 temp_max_sized_VTS->usedTS = 0;
3473 VTS__join(temp_max_sized_VTS, vts1,vts2);
3474 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003475 ////++
3476 join2_cache[hash].vi1 = vi1;
3477 join2_cache[hash].vi2 = vi2;
3478 join2_cache[hash].res = res;
3479 ////--
3480 return res;
3481}
3482static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003483 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003484}
3485
3486/* create a singleton VTS, namely [thr:1] */
3487static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
sewardj7aa38a92011-02-27 23:04:12 +00003488 temp_max_sized_VTS->usedTS = 0;
3489 VTS__singleton(temp_max_sized_VTS, thr,tym);
3490 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003491}
3492
3493/* tick operation, creates value 1 if specified index is absent */
3494static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3495 VTS* vts = VtsID__to_VTS(vi);
sewardj7aa38a92011-02-27 23:04:12 +00003496 temp_max_sized_VTS->usedTS = 0;
3497 VTS__tick(temp_max_sized_VTS, idx,vts);
3498 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003499}
3500
3501/* index into a VTS (only for assertions) */
3502static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3503 VTS* vts = VtsID__to_VTS(vi);
3504 return VTS__indexAt_SLOW( vts, idx );
3505}
3506
sewardj23f12002009-07-24 08:45:08 +00003507/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3508 any, really) element in vi1 which is pointwise greater-than the
3509 corresponding element in vi2. If no such element exists, return
3510 NULL. This needs to be fairly quick since it is called every time
3511 a race is detected. */
3512static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3513{
3514 VTS *vts1, *vts2;
sewardje4cce742011-02-24 15:25:24 +00003515 Thr* diffthr;
3516 ThrID diffthrid;
sewardj23f12002009-07-24 08:45:08 +00003517 tl_assert(vi1 != vi2);
3518 vts1 = VtsID__to_VTS(vi1);
3519 vts2 = VtsID__to_VTS(vi2);
3520 tl_assert(vts1 != vts2);
sewardje4cce742011-02-24 15:25:24 +00003521 diffthrid = VTS__cmpLEQ(vts1, vts2);
3522 diffthr = Thr__from_ThrID(diffthrid);
sewardj23f12002009-07-24 08:45:08 +00003523 tl_assert(diffthr); /* else they are LEQ ! */
3524 return diffthr;
3525}
3526
3527
3528/////////////////////////////////////////////////////////
3529// //
3530// Filters //
3531// //
3532/////////////////////////////////////////////////////////
3533
sewardj23f12002009-07-24 08:45:08 +00003534/* Forget everything we know -- clear the filter and let everything
3535 through. This needs to be as fast as possible, since it is called
3536 every time the running thread changes, and every time a thread's
3537 vector clocks change, which can be quite frequent. The obvious
3538 fast way to do this is simply to stuff in tags which we know are
3539 not going to match anything, since they're not aligned to the start
3540 of a line. */
florian6bd9dc12012-11-23 16:17:43 +00003541static void Filter__clear ( Filter* fi, const HChar* who )
sewardj23f12002009-07-24 08:45:08 +00003542{
3543 UWord i;
3544 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3545 for (i = 0; i < FI_NUM_LINES; i += 8) {
3546 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3547 fi->tags[i+1] = 1;
3548 fi->tags[i+2] = 1;
3549 fi->tags[i+3] = 1;
3550 fi->tags[i+4] = 1;
3551 fi->tags[i+5] = 1;
3552 fi->tags[i+6] = 1;
3553 fi->tags[i+7] = 1;
3554 }
3555 tl_assert(i == FI_NUM_LINES);
3556}
3557
3558/* Clearing an arbitrary range in the filter. Unfortunately
3559 we have to do this due to core-supplied new/die-mem events. */
3560
3561static void Filter__clear_1byte ( Filter* fi, Addr a )
3562{
3563 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3564 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3565 FiLine* line = &fi->lines[lineno];
3566 UWord loff = (a - atag) / 8;
3567 UShort mask = 0x3 << (2 * (a & 7));
3568 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3569 if (LIKELY( fi->tags[lineno] == atag )) {
3570 /* hit. clear the bits. */
3571 UShort u16 = line->u16s[loff];
3572 line->u16s[loff] = u16 & ~mask; /* clear them */
3573 } else {
3574 /* miss. The filter doesn't hold this address, so ignore. */
3575 }
3576}
3577
3578static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3579{
3580 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3581 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3582 FiLine* line = &fi->lines[lineno];
3583 UWord loff = (a - atag) / 8;
3584 if (LIKELY( fi->tags[lineno] == atag )) {
3585 line->u16s[loff] = 0;
3586 } else {
3587 /* miss. The filter doesn't hold this address, so ignore. */
3588 }
3589}
3590
philippefc00a2a2015-05-15 11:41:54 +00003591/* Only used to verify the fast Filter__clear_range */
3592__attribute__((unused))
3593static void Filter__clear_range_SLOW ( Filter* fi, Addr a, UWord len )
sewardj23f12002009-07-24 08:45:08 +00003594{
philippefc00a2a2015-05-15 11:41:54 +00003595 tl_assert (CHECK_ZSM);
3596
sewardj23f12002009-07-24 08:45:08 +00003597 /* slowly do part preceding 8-alignment */
3598 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3599 Filter__clear_1byte( fi, a );
3600 a++;
3601 len--;
3602 }
3603 /* vector loop */
3604 while (len >= 8) {
3605 Filter__clear_8bytes_aligned( fi, a );
3606 a += 8;
3607 len -= 8;
3608 }
3609 /* slowly do tail */
3610 while (UNLIKELY(len > 0)) {
3611 Filter__clear_1byte( fi, a );
3612 a++;
3613 len--;
3614 }
3615}
3616
philippefc00a2a2015-05-15 11:41:54 +00003617static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3618{
3619# if CHECK_ZSM > 0
3620 /* We check the below more complex algorithm with the simple one.
3621 This check is very expensive : we do first the slow way on a
3622 copy of the data, then do it the fast way. On RETURN, we check
3623 the two values are equal. */
3624 Filter fi_check = *fi;
3625 Filter__clear_range_SLOW(&fi_check, a, len);
3626# define RETURN goto check_and_return
3627# else
3628# define RETURN return
3629# endif
3630
3631 Addr begtag = FI_GET_TAG(a); /* tag of range begin */
3632
3633 Addr end = a + len - 1;
3634 Addr endtag = FI_GET_TAG(end); /* tag of range end. */
3635
3636 UWord rlen = len; /* remaining length to clear */
3637
3638 Addr c = a; /* Current position we are clearing. */
3639 UWord clineno = FI_GET_LINENO(c); /* Current lineno we are clearing */
3640 FiLine* cline; /* Current line we are clearing */
3641 UWord cloff; /* Current offset in line we are clearing, when clearing
3642 partial lines. */
3643
3644 UShort u16;
3645
3646 STATIC_ASSERT (FI_LINE_SZB == 32);
3647 // Below assumes filter lines are 32 bytes
3648
3649 if (LIKELY(fi->tags[clineno] == begtag)) {
3650 /* LIKELY for the heavy caller VG_(unknown_SP_update). */
3651 /* First filter line matches begtag.
3652 If c is not at the filter line begin, the below will clear
3653 the filter line bytes starting from c. */
3654 cline = &fi->lines[clineno];
3655 cloff = (c - begtag) / 8;
3656
3657 /* First the byte(s) needed to reach 8-alignment */
3658 if (UNLIKELY(!VG_IS_8_ALIGNED(c))) {
3659 /* hiB is the nr of bytes (higher addresses) from c to reach
3660 8-aligment. */
3661 UWord hiB = 8 - (c & 7);
3662 /* Compute 2-bit/byte mask representing hiB bytes [c..c+hiB[
3663 mask is C000 , F000, FC00, FF00, FFC0, FFF0 or FFFC for the byte
3664 range 7..7 6..7 5..7 4..7 3..7 2..7 1..7 */
3665 UShort mask = 0xFFFF << (16 - 2*hiB);
3666
3667 u16 = cline->u16s[cloff];
3668 if (LIKELY(rlen >= hiB)) {
3669 cline->u16s[cloff] = u16 & ~mask; /* clear all hiB from c */
3670 rlen -= hiB;
3671 c += hiB;
3672 cloff += 1;
3673 } else {
3674 /* Only have the bits for rlen bytes bytes. */
3675 mask = mask & ~(0xFFFF << (16 - 2*(hiB-rlen)));
3676 cline->u16s[cloff] = u16 & ~mask; /* clear rlen bytes from c. */
3677 RETURN; // We have cleared all what we can.
3678 }
3679 }
3680 /* c is now 8 aligned. Clear by 8 aligned bytes,
3681 till c is filter-line aligned */
3682 while (!VG_IS_32_ALIGNED(c) && rlen >= 8) {
3683 cline->u16s[cloff] = 0;
3684 c += 8;
3685 rlen -= 8;
3686 cloff += 1;
3687 }
3688 } else {
3689 c = begtag + FI_LINE_SZB;
3690 if (c > end)
3691 RETURN; // We have cleared all what we can.
3692 rlen -= c - a;
3693 }
3694 // We have changed c, so re-establish clineno.
3695 clineno = FI_GET_LINENO(c);
3696
3697 if (rlen >= FI_LINE_SZB) {
3698 /* Here, c is filter line-aligned. Clear all full lines that
3699 overlap with the range starting at c, made of a full lines */
3700 UWord nfull = rlen / FI_LINE_SZB;
3701 UWord full_len = nfull * FI_LINE_SZB;
3702 rlen -= full_len;
3703 if (nfull > FI_NUM_LINES)
3704 nfull = FI_NUM_LINES; // no need to check several times the same entry.
3705
3706 for (UWord n = 0; n < nfull; n++) {
3707 if (UNLIKELY(address_in_range(fi->tags[clineno], c, full_len))) {
3708 cline = &fi->lines[clineno];
3709 cline->u16s[0] = 0;
3710 cline->u16s[1] = 0;
3711 cline->u16s[2] = 0;
3712 cline->u16s[3] = 0;
3713 STATIC_ASSERT (4 == sizeof(cline->u16s)/sizeof(cline->u16s[0]));
3714 }
3715 clineno++;
3716 if (UNLIKELY(clineno == FI_NUM_LINES))
3717 clineno = 0;
3718 }
3719
3720 c += full_len;
3721 clineno = FI_GET_LINENO(c);
3722 }
3723
3724 if (CHECK_ZSM) {
3725 tl_assert(VG_IS_8_ALIGNED(c));
3726 tl_assert(clineno == FI_GET_LINENO(c));
3727 }
3728
3729 /* Do the last filter line, if it was not cleared as a full filter line */
3730 if (UNLIKELY(rlen > 0) && fi->tags[clineno] == endtag) {
3731 cline = &fi->lines[clineno];
3732 cloff = (c - endtag) / 8;
3733 if (CHECK_ZSM) tl_assert(FI_GET_TAG(c) == endtag);
3734
3735 /* c is 8 aligned. Clear by 8 aligned bytes, till we have less than
3736 8 bytes. */
3737 while (rlen >= 8) {
3738 cline->u16s[cloff] = 0;
3739 c += 8;
3740 rlen -= 8;
3741 cloff += 1;
3742 }
3743 /* Then the remaining byte(s) */
3744 if (rlen > 0) {
3745 /* nr of bytes from c to reach end. */
3746 UWord loB = rlen;
3747 /* Compute mask representing loB bytes [c..c+loB[ :
3748 mask is 0003, 000F, 003F, 00FF, 03FF, 0FFF or 3FFF */
3749 UShort mask = 0xFFFF >> (16 - 2*loB);
3750
3751 u16 = cline->u16s[cloff];
3752 cline->u16s[cloff] = u16 & ~mask; /* clear all loB from c */
3753 }
3754 }
3755
3756# if CHECK_ZSM > 0
3757 check_and_return:
3758 tl_assert (VG_(memcmp)(&fi_check, fi, sizeof(fi_check)) == 0);
3759# endif
3760# undef RETURN
3761}
sewardj23f12002009-07-24 08:45:08 +00003762
3763/* ------ Read handlers for the filter. ------ */
3764
3765static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3766{
3767 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3768 return False;
3769 {
3770 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3771 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3772 FiLine* line = &fi->lines[lineno];
3773 UWord loff = (a - atag) / 8;
3774 UShort mask = 0xAAAA;
3775 if (LIKELY( fi->tags[lineno] == atag )) {
3776 /* hit. check line and update. */
3777 UShort u16 = line->u16s[loff];
3778 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3779 line->u16s[loff] = u16 | mask; /* set them */
3780 return ok;
3781 } else {
3782 /* miss. nuke existing line and re-use it. */
3783 UWord i;
3784 fi->tags[lineno] = atag;
3785 for (i = 0; i < FI_LINE_SZB / 8; i++)
3786 line->u16s[i] = 0;
3787 line->u16s[loff] = mask;
3788 return False;
3789 }
3790 }
3791}
3792
3793static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3794{
3795 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3796 return False;
3797 {
3798 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3799 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3800 FiLine* line = &fi->lines[lineno];
3801 UWord loff = (a - atag) / 8;
3802 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3803 if (LIKELY( fi->tags[lineno] == atag )) {
3804 /* hit. check line and update. */
3805 UShort u16 = line->u16s[loff];
3806 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3807 line->u16s[loff] = u16 | mask; /* set them */
3808 return ok;
3809 } else {
3810 /* miss. nuke existing line and re-use it. */
3811 UWord i;
3812 fi->tags[lineno] = atag;
3813 for (i = 0; i < FI_LINE_SZB / 8; i++)
3814 line->u16s[i] = 0;
3815 line->u16s[loff] = mask;
3816 return False;
3817 }
3818 }
3819}
3820
3821static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3822{
3823 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3824 return False;
3825 {
3826 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3827 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3828 FiLine* line = &fi->lines[lineno];
3829 UWord loff = (a - atag) / 8;
3830 UShort mask = 0xA << (2 * (a & 6));
3831 /* mask is A000, 0A00, 00A0 or 000A */
3832 if (LIKELY( fi->tags[lineno] == atag )) {
3833 /* hit. check line and update. */
3834 UShort u16 = line->u16s[loff];
3835 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3836 line->u16s[loff] = u16 | mask; /* set them */
3837 return ok;
3838 } else {
3839 /* miss. nuke existing line and re-use it. */
3840 UWord i;
3841 fi->tags[lineno] = atag;
3842 for (i = 0; i < FI_LINE_SZB / 8; i++)
3843 line->u16s[i] = 0;
3844 line->u16s[loff] = mask;
3845 return False;
3846 }
3847 }
3848}
3849
3850static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3851{
3852 {
3853 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3854 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3855 FiLine* line = &fi->lines[lineno];
3856 UWord loff = (a - atag) / 8;
3857 UShort mask = 0x2 << (2 * (a & 7));
3858 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3859 if (LIKELY( fi->tags[lineno] == atag )) {
3860 /* hit. check line and update. */
3861 UShort u16 = line->u16s[loff];
3862 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3863 line->u16s[loff] = u16 | mask; /* set them */
3864 return ok;
3865 } else {
3866 /* miss. nuke existing line and re-use it. */
3867 UWord i;
3868 fi->tags[lineno] = atag;
3869 for (i = 0; i < FI_LINE_SZB / 8; i++)
3870 line->u16s[i] = 0;
3871 line->u16s[loff] = mask;
3872 return False;
3873 }
3874 }
3875}
3876
3877
3878/* ------ Write handlers for the filter. ------ */
3879
3880static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3881{
3882 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3883 return False;
3884 {
3885 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3886 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3887 FiLine* line = &fi->lines[lineno];
3888 UWord loff = (a - atag) / 8;
3889 UShort mask = 0xFFFF;
3890 if (LIKELY( fi->tags[lineno] == atag )) {
3891 /* hit. check line and update. */
3892 UShort u16 = line->u16s[loff];
3893 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3894 line->u16s[loff] = u16 | mask; /* set them */
3895 return ok;
3896 } else {
3897 /* miss. nuke existing line and re-use it. */
3898 UWord i;
3899 fi->tags[lineno] = atag;
3900 for (i = 0; i < FI_LINE_SZB / 8; i++)
3901 line->u16s[i] = 0;
3902 line->u16s[loff] = mask;
3903 return False;
3904 }
3905 }
3906}
3907
3908static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3909{
3910 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3911 return False;
3912 {
3913 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3914 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3915 FiLine* line = &fi->lines[lineno];
3916 UWord loff = (a - atag) / 8;
3917 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3918 if (LIKELY( fi->tags[lineno] == atag )) {
3919 /* hit. check line and update. */
3920 UShort u16 = line->u16s[loff];
3921 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3922 line->u16s[loff] = u16 | mask; /* set them */
3923 return ok;
3924 } else {
3925 /* miss. nuke existing line and re-use it. */
3926 UWord i;
3927 fi->tags[lineno] = atag;
3928 for (i = 0; i < FI_LINE_SZB / 8; i++)
3929 line->u16s[i] = 0;
3930 line->u16s[loff] = mask;
3931 return False;
3932 }
3933 }
3934}
3935
3936static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3937{
3938 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3939 return False;
3940 {
3941 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3942 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3943 FiLine* line = &fi->lines[lineno];
3944 UWord loff = (a - atag) / 8;
3945 UShort mask = 0xF << (2 * (a & 6));
3946 /* mask is F000, 0F00, 00F0 or 000F */
3947 if (LIKELY( fi->tags[lineno] == atag )) {
3948 /* hit. check line and update. */
3949 UShort u16 = line->u16s[loff];
3950 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3951 line->u16s[loff] = u16 | mask; /* set them */
3952 return ok;
3953 } else {
3954 /* miss. nuke existing line and re-use it. */
3955 UWord i;
3956 fi->tags[lineno] = atag;
3957 for (i = 0; i < FI_LINE_SZB / 8; i++)
3958 line->u16s[i] = 0;
3959 line->u16s[loff] = mask;
3960 return False;
3961 }
3962 }
3963}
3964
3965static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3966{
3967 {
3968 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3969 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3970 FiLine* line = &fi->lines[lineno];
3971 UWord loff = (a - atag) / 8;
3972 UShort mask = 0x3 << (2 * (a & 7));
3973 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3974 if (LIKELY( fi->tags[lineno] == atag )) {
3975 /* hit. check line and update. */
3976 UShort u16 = line->u16s[loff];
3977 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3978 line->u16s[loff] = u16 | mask; /* set them */
3979 return ok;
3980 } else {
3981 /* miss. nuke existing line and re-use it. */
3982 UWord i;
3983 fi->tags[lineno] = atag;
3984 for (i = 0; i < FI_LINE_SZB / 8; i++)
3985 line->u16s[i] = 0;
3986 line->u16s[loff] = mask;
3987 return False;
3988 }
3989 }
3990}
3991
sewardjf98e1c02008-10-25 16:22:41 +00003992
3993/////////////////////////////////////////////////////////
3994// //
3995// Threads //
3996// //
3997/////////////////////////////////////////////////////////
3998
sewardje4cce742011-02-24 15:25:24 +00003999/* Maps ThrID values to their Thr*s (which contain ThrID values that
4000 should point back to the relevant slot in the array. Lowest
4001 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
4002static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
4003
4004/* And a counter to dole out ThrID values. For rationale/background,
4005 see comments on definition of ScalarTS (far) above. */
sewardj7aa38a92011-02-27 23:04:12 +00004006static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
sewardje4cce742011-02-24 15:25:24 +00004007
4008static ThrID Thr__to_ThrID ( Thr* thr ) {
4009 return thr->thrid;
4010}
4011static Thr* Thr__from_ThrID ( UInt thrid ) {
4012 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
4013 tl_assert(thr->thrid == thrid);
4014 return thr;
4015}
4016
4017static Thr* Thr__new ( void )
4018{
sewardjf98e1c02008-10-25 16:22:41 +00004019 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
4020 thr->viR = VtsID_INVALID;
4021 thr->viW = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00004022 thr->llexit_done = False;
4023 thr->joinedwith_done = False;
sewardj23f12002009-07-24 08:45:08 +00004024 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
philippeca903bb2014-04-26 22:50:08 +00004025 if (HG_(clo_history_level) == 1)
4026 thr->local_Kws_n_stacks
4027 = VG_(newXA)( HG_(zalloc),
4028 "libhb.Thr__new.3 (local_Kws_and_stacks)",
4029 HG_(free), sizeof(ULong_n_EC) );
sewardje4cce742011-02-24 15:25:24 +00004030
4031 /* Add this Thr* <-> ThrID binding to the mapping, and
4032 cross-check */
4033 if (!thrid_to_thr_map) {
4034 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
4035 HG_(free), sizeof(Thr*) );
sewardje4cce742011-02-24 15:25:24 +00004036 }
4037
sewardj7aa38a92011-02-27 23:04:12 +00004038 if (thrid_counter >= ThrID_MAX_VALID) {
sewardje4cce742011-02-24 15:25:24 +00004039 /* We're hosed. We have to stop. */
4040 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
4041 }
4042
4043 thr->thrid = thrid_counter++;
4044 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
4045 tl_assert(ix + 1024 == thr->thrid);
4046
sewardjf98e1c02008-10-25 16:22:41 +00004047 return thr;
4048}
4049
sewardj8ab2c132009-08-02 09:34:35 +00004050static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00004051{
4052 Word nPresent;
4053 ULong_n_EC pair;
4054 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00004055
4056 // We only collect this info at history level 1 (approx)
4057 if (HG_(clo_history_level) != 1)
4058 return;
4059
sewardj8ab2c132009-08-02 09:34:35 +00004060 /* This is the scalar Kw for thr. */
4061 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00004062 pair.ec = main_get_EC( thr );
4063 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00004064 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004065
4066 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00004067 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00004068
4069 /* Throw away old stacks, if necessary. We can't accumulate stuff
4070 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00004071 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
4072 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
4073 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
4074 if (0)
4075 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00004076 thr, pair.ull, pair.ec );
4077 }
4078
4079 if (nPresent > 0) {
4080 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00004081 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
4082 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00004083 }
4084
4085 if (nPresent == 0)
4086 pair.ec = NULL;
4087
sewardj8ab2c132009-08-02 09:34:35 +00004088 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00004089
4090 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00004091 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00004092 thr, pair.ull, pair.ec );
4093 if (0)
4094 VG_(pp_ExeContext)(pair.ec);
4095}
4096
florian6bd9dc12012-11-23 16:17:43 +00004097static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
4098 const ULong_n_EC* pair2 )
sewardj23f12002009-07-24 08:45:08 +00004099{
4100 if (pair1->ull < pair2->ull) return -1;
4101 if (pair1->ull > pair2->ull) return 1;
4102 return 0;
4103}
4104
sewardjf98e1c02008-10-25 16:22:41 +00004105
4106/////////////////////////////////////////////////////////
4107// //
4108// Shadow Values //
4109// //
4110/////////////////////////////////////////////////////////
4111
4112// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
4113// hb_zsm.h. We have to do everything else here.
4114
4115/* SVal is 64 bit unsigned int.
4116
4117 <---------30---------> <---------30--------->
4118 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00004119 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00004120 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
4121
sewardjf98e1c02008-10-25 16:22:41 +00004122*/
4123#define SVAL_TAGMASK (3ULL << 62)
4124
4125static inline Bool SVal__isC ( SVal s ) {
4126 return (0ULL << 62) == (s & SVAL_TAGMASK);
4127}
4128static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
4129 //tl_assert(VtsID__is_valid(rmini));
4130 //tl_assert(VtsID__is_valid(wmini));
4131 return (((ULong)rmini) << 32) | ((ULong)wmini);
4132}
4133static inline VtsID SVal__unC_Rmin ( SVal s ) {
4134 tl_assert(SVal__isC(s));
4135 return (VtsID)(s >> 32);
4136}
4137static inline VtsID SVal__unC_Wmin ( SVal s ) {
4138 tl_assert(SVal__isC(s));
4139 return (VtsID)(s & 0xFFFFFFFFULL);
4140}
4141
sewardj23f12002009-07-24 08:45:08 +00004142static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004143 return (2ULL << 62) == (s & SVAL_TAGMASK);
4144}
sewardj5aa09bf2014-06-20 14:25:53 +00004145__attribute__((unused))
sewardj23f12002009-07-24 08:45:08 +00004146static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00004147 return 2ULL << 62;
4148}
4149
4150/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00004151static inline void SVal__rcinc ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004152 if (SVal__isC(s)) {
4153 VtsID__rcinc( SVal__unC_Rmin(s) );
4154 VtsID__rcinc( SVal__unC_Wmin(s) );
4155 }
4156}
4157
4158/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00004159static inline void SVal__rcdec ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004160 if (SVal__isC(s)) {
4161 VtsID__rcdec( SVal__unC_Rmin(s) );
4162 VtsID__rcdec( SVal__unC_Wmin(s) );
4163 }
4164}
4165
philippe71ed3c92015-05-17 19:32:42 +00004166static inline void *SVal2Ptr (SVal s)
4167{
4168 return (void*)(UWord)s;
4169}
4170
4171static inline SVal Ptr2SVal (void* ptr)
4172{
4173 return (SVal)(UWord)ptr;
4174}
4175
4176
sewardjf98e1c02008-10-25 16:22:41 +00004177
4178/////////////////////////////////////////////////////////
4179// //
4180// Change-event map2 //
4181// //
4182/////////////////////////////////////////////////////////
4183
sewardjf98e1c02008-10-25 16:22:41 +00004184/* This is in two parts:
4185
sewardj23f12002009-07-24 08:45:08 +00004186 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00004187 traces. When the reference count of a stack trace becomes zero,
4188 it is removed from the set and freed up. The intent is to have
4189 a set of stack traces which can be referred to from (2), but to
4190 only represent each one once. The set is indexed/searched by
4191 ordering on the stack trace vectors.
4192
sewardj849b0ed2008-12-21 10:43:10 +00004193 2. A SparseWA of OldRefs. These store information about each old
4194 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00004195 location for which the information is recorded. For LRU
philippecabdbb52015-04-20 21:33:16 +00004196 purposes, each OldRef in the SparseWA is also on a doubly
4197 linked list maintaining the order in which the OldRef were most
4198 recently accessed.
sewardjf98e1c02008-10-25 16:22:41 +00004199
4200 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00004201 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
4202 size) triples to RCECs. This allows us to collect the last
4203 access-traceback by up to N_OLDREF_ACCS different triples for
4204 this location. The accs[] array is a MTF-array. If a binding
4205 falls off the end, that's too bad -- we will lose info about
4206 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00004207
philippecabdbb52015-04-20 21:33:16 +00004208 We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
4209 Then we do exact LRU discarding. For each discarded OldRef we must
sewardjf98e1c02008-10-25 16:22:41 +00004210 of course decrement the reference count on the all RCECs it
4211 refers to, in order that entries from (1) eventually get
4212 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00004213
4214 A major improvement in reliability of this mechanism would be to
4215 have a dynamically sized OldRef.accs[] array, so no entries ever
4216 fall off the end. In investigations (Dec 08) it appears that a
4217 major cause for the non-availability of conflicting-access traces
4218 in race reports is caused by the fixed size of this array. I
4219 suspect for most OldRefs, only a few entries are used, but for a
4220 minority of cases there is an overflow, leading to info lossage.
4221 Investigations also suggest this is very workload and scheduling
4222 sensitive. Therefore a dynamic sizing would be better.
4223
philippe6643e962012-01-17 21:16:30 +00004224 However, dynamic sizing would defeat the use of a PoolAllocator
sewardj849b0ed2008-12-21 10:43:10 +00004225 for OldRef structures. And that's important for performance. So
4226 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00004227*/
4228
4229
4230static UWord stats__ctxt_rcdec1 = 0;
4231static UWord stats__ctxt_rcdec2 = 0;
4232static UWord stats__ctxt_rcdec3 = 0;
4233static UWord stats__ctxt_rcdec_calls = 0;
4234static UWord stats__ctxt_rcdec_discards = 0;
4235static UWord stats__ctxt_rcdec1_eq = 0;
4236
4237static UWord stats__ctxt_tab_curr = 0;
4238static UWord stats__ctxt_tab_max = 0;
4239
4240static UWord stats__ctxt_tab_qs = 0;
4241static UWord stats__ctxt_tab_cmps = 0;
4242
4243
4244///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00004245//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00004246///
4247
4248#define N_FRAMES 8
4249
4250// (UInt) `echo "Reference Counted Execution Context" | md5sum`
4251#define RCEC_MAGIC 0xab88abb2UL
4252
4253//#define N_RCEC_TAB 98317 /* prime */
4254#define N_RCEC_TAB 196613 /* prime */
4255
4256typedef
4257 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00004258 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00004259 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00004260 UWord rc;
4261 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00004262 UWord frames_hash; /* hash of all the frames */
4263 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00004264 }
4265 RCEC;
4266
philippecabdbb52015-04-20 21:33:16 +00004267//////////// BEGIN RCEC pool allocator
4268static PoolAlloc* rcec_pool_allocator;
4269static RCEC* alloc_RCEC ( void ) {
4270 return VG_(allocEltPA) ( rcec_pool_allocator );
4271}
4272
4273static void free_RCEC ( RCEC* rcec ) {
4274 tl_assert(rcec->magic == RCEC_MAGIC);
4275 VG_(freeEltPA)( rcec_pool_allocator, rcec );
4276}
4277//////////// END RCEC pool allocator
4278
sewardjf98e1c02008-10-25 16:22:41 +00004279static RCEC** contextTab = NULL; /* hash table of RCEC*s */
4280
philippecabdbb52015-04-20 21:33:16 +00004281/* Count of allocated RCEC having ref count > 0 */
4282static UWord RCEC_referenced = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004283
4284/* Gives an arbitrary total order on RCEC .frames fields */
4285static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4286 Word i;
4287 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4288 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00004289 if (ec1->frames_hash < ec2->frames_hash) return -1;
4290 if (ec1->frames_hash > ec2->frames_hash) return 1;
4291 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004292 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00004293 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00004294 }
4295 return 0;
4296}
4297
4298
4299/* Dec the ref of this RCEC. */
4300static void ctxt__rcdec ( RCEC* ec )
4301{
4302 stats__ctxt_rcdec_calls++;
4303 tl_assert(ec && ec->magic == RCEC_MAGIC);
4304 tl_assert(ec->rc > 0);
4305 ec->rc--;
philippecabdbb52015-04-20 21:33:16 +00004306 if (ec->rc == 0)
4307 RCEC_referenced--;
sewardjf98e1c02008-10-25 16:22:41 +00004308}
4309
4310static void ctxt__rcinc ( RCEC* ec )
4311{
4312 tl_assert(ec && ec->magic == RCEC_MAGIC);
philippecabdbb52015-04-20 21:33:16 +00004313 if (ec->rc == 0)
4314 RCEC_referenced++;
sewardjf98e1c02008-10-25 16:22:41 +00004315 ec->rc++;
4316}
4317
4318
4319/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
4320 move it one step closer the the front of the list, so as to make
4321 subsequent searches for it cheaper. */
4322static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4323{
4324 RCEC *ec0, *ec1, *ec2;
4325 if (ec == *headp)
4326 tl_assert(0); /* already at head of list */
4327 tl_assert(ec != NULL);
4328 ec0 = *headp;
4329 ec1 = NULL;
4330 ec2 = NULL;
4331 while (True) {
4332 if (ec0 == NULL || ec0 == ec) break;
4333 ec2 = ec1;
4334 ec1 = ec0;
4335 ec0 = ec0->next;
4336 }
4337 tl_assert(ec0 == ec);
4338 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4339 RCEC* tmp;
4340 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4341 predecessor. Swap ec0 and ec1, that is, move ec0 one step
4342 closer to the start of the list. */
4343 tl_assert(ec2->next == ec1);
4344 tl_assert(ec1->next == ec0);
4345 tmp = ec0->next;
4346 ec2->next = ec0;
4347 ec0->next = ec1;
4348 ec1->next = tmp;
4349 }
4350 else
4351 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4352 /* it's second in the list. */
4353 tl_assert(*headp == ec1);
4354 tl_assert(ec1->next == ec0);
4355 ec1->next = ec0->next;
4356 ec0->next = ec1;
4357 *headp = ec0;
4358 }
4359}
4360
4361
4362/* Find the given RCEC in the tree, and return a pointer to it. Or,
4363 if not present, add the given one to the tree (by making a copy of
4364 it, so the caller can immediately deallocate the original) and
4365 return a pointer to the copy. The caller can safely have 'example'
4366 on its stack, since we will always return a pointer to a copy of
4367 it, not to the original. Note that the inserted node will have .rc
4368 of zero and so the caller must immediatly increment it. */
4369__attribute__((noinline))
4370static RCEC* ctxt__find_or_add ( RCEC* example )
4371{
4372 UWord hent;
4373 RCEC* copy;
4374 tl_assert(example && example->magic == RCEC_MAGIC);
4375 tl_assert(example->rc == 0);
4376
4377 /* Search the hash table to see if we already have it. */
4378 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00004379 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00004380 copy = contextTab[hent];
4381 while (1) {
4382 if (!copy) break;
4383 tl_assert(copy->magic == RCEC_MAGIC);
4384 stats__ctxt_tab_cmps++;
4385 if (0 == RCEC__cmp_by_frames(copy, example)) break;
4386 copy = copy->next;
4387 }
4388
4389 if (copy) {
4390 tl_assert(copy != example);
4391 /* optimisation: if it's not at the head of its list, move 1
4392 step fwds, to make future searches cheaper */
4393 if (copy != contextTab[hent]) {
4394 move_RCEC_one_step_forward( &contextTab[hent], copy );
4395 }
4396 } else {
sewardjd86e3a22008-12-03 11:39:37 +00004397 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00004398 tl_assert(copy != example);
4399 *copy = *example;
4400 copy->next = contextTab[hent];
4401 contextTab[hent] = copy;
4402 stats__ctxt_tab_curr++;
4403 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4404 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4405 }
4406 return copy;
4407}
4408
4409static inline UWord ROLW ( UWord w, Int n )
4410{
4411 Int bpw = 8 * sizeof(UWord);
4412 w = (w << n) | (w >> (bpw-n));
4413 return w;
4414}
4415
4416__attribute__((noinline))
4417static RCEC* get_RCEC ( Thr* thr )
4418{
4419 UWord hash, i;
4420 RCEC example;
4421 example.magic = RCEC_MAGIC;
4422 example.rc = 0;
4423 example.rcX = 0;
florian195623b2013-01-22 00:25:05 +00004424 example.next = NULL;
njn6c83d5e2009-05-05 23:46:24 +00004425 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00004426 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00004427 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004428 hash ^= example.frames[i];
4429 hash = ROLW(hash, 19);
4430 }
njn6c83d5e2009-05-05 23:46:24 +00004431 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00004432 return ctxt__find_or_add( &example );
4433}
4434
4435///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00004436//// Part (2):
4437/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00004438///
4439
sewardjffce8152011-06-24 10:09:41 +00004440/* Records an access: a thread, a context (size & writeness) and the
4441 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
4442 2, 10 = 4, 11 = 8.
sewardjc5ea9962008-12-07 01:41:46 +00004443*/
sewardjffce8152011-06-24 10:09:41 +00004444typedef
4445 struct {
4446 RCEC* rcec;
4447 WordSetID locksHeldW;
4448 UInt thrid : SCALARTS_N_THRBITS;
4449 UInt szLg2B : 2;
4450 UInt isW : 1;
4451 }
4452 Thr_n_RCEC;
sewardjf98e1c02008-10-25 16:22:41 +00004453
sewardj849b0ed2008-12-21 10:43:10 +00004454#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00004455
4456typedef
philippecabdbb52015-04-20 21:33:16 +00004457 struct OldRef {
4458 struct OldRef *prev; // to refs older than this one
4459 struct OldRef *next; // to refs newer that this one
4460 Addr ga; // Address for which we record up to N_OLDREF_ACCS accesses.
sewardjffce8152011-06-24 10:09:41 +00004461 /* unused slots in this array have .thrid == 0, which is invalid */
sewardjf98e1c02008-10-25 16:22:41 +00004462 Thr_n_RCEC accs[N_OLDREF_ACCS];
4463 }
4464 OldRef;
philippecabdbb52015-04-20 21:33:16 +00004465/* We need ga in OldRef in order to remove OldRef from the sparsewa
4466 by key (i.e. ga) when re-using the lru OldRef. */
sewardjd86e3a22008-12-03 11:39:37 +00004467
philippe6643e962012-01-17 21:16:30 +00004468//////////// BEGIN OldRef pool allocator
4469static PoolAlloc* oldref_pool_allocator;
philippecabdbb52015-04-20 21:33:16 +00004470// Note: We only allocate elements in this pool allocator, we never free them.
4471// We stop allocating elements at VG_(clo_conflict_cache_size).
philippe6643e962012-01-17 21:16:30 +00004472//////////// END OldRef pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00004473
philippecabdbb52015-04-20 21:33:16 +00004474static OldRef mru;
4475static OldRef lru;
4476// A double linked list, chaining all OldREf in a mru/lru order.
4477// mru/lru are sentinel nodes.
4478// Whenever an oldref is re-used, its position is changed as the most recently
4479// used (i.e. pointed to by mru.prev).
4480// When a new oldref is needed, it is allocated from the pool
4481// if we have not yet reached --conflict-cache-size.
4482// Otherwise, if all oldref have already been allocated,
4483// the least recently used (i.e. pointed to by lru.next) is re-used.
4484// When an OldRef is used, it is moved as the most recently used entry
4485// (i.e. pointed to by mru.prev).
4486
4487// Removes r from the double linked list
4488// Note: we do not need to test for special cases such as
4489// NULL next or prev pointers, because we have sentinel nodes
4490// at both sides of the list. So, a node is always forward and
4491// backward linked.
4492static inline void OldRef_unchain(OldRef *r)
4493{
4494 r->next->prev = r->prev;
4495 r->prev->next = r->next;
4496}
4497
4498// Insert new as the newest OldRef
4499// Similarly to OldRef_unchain, no need to test for NULL
4500// pointers, as e.g. mru.prev is always guaranteed to point
4501// to a non NULL node (lru when the list is empty).
4502static inline void OldRef_newest(OldRef *new)
4503{
4504 new->next = &mru;
4505 new->prev = mru.prev;
4506 mru.prev = new;
4507 new->prev->next = new;
4508}
sewardjd86e3a22008-12-03 11:39:37 +00004509
sewardjbc307e52008-12-06 22:10:54 +00004510static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
sewardjbc307e52008-12-06 22:10:54 +00004511static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
philippecabdbb52015-04-20 21:33:16 +00004512/* Note: the nr of ref in the oldrefTree will always be equal to
4513 the nr of elements that were allocated from the OldRef pool allocator
4514 as we never free an OldRef : we just re-use them. */
4515
4516
4517/* allocates a new OldRef or re-use the lru one if all allowed OldRef
4518 have already been allocated. */
4519static OldRef* alloc_or_reuse_OldRef ( void )
4520{
4521 if (oldrefTreeN < HG_(clo_conflict_cache_size)) {
4522 oldrefTreeN++;
4523 return VG_(allocEltPA) ( oldref_pool_allocator );
4524 } else {
4525 Bool b;
4526 UWord valW;
4527 OldRef *oldref = lru.next;
4528
4529 OldRef_unchain(oldref);
4530 b = VG_(delFromSWA)( oldrefTree, &valW, oldref->ga );
4531 tl_assert(b);
4532 tl_assert (oldref == (OldRef*)valW);
4533
4534 for (UInt i = 0; i < N_OLDREF_ACCS; i++) {
4535 ThrID aThrID = oldref->accs[i].thrid;
4536 RCEC* aRef = oldref->accs[i].rcec;
4537 if (aRef) {
4538 tl_assert(aThrID != 0);
4539 stats__ctxt_rcdec3++;
4540 ctxt__rcdec( aRef );
4541 } else {
4542 tl_assert(aThrID == 0);
4543 }
4544 }
4545 return oldref;
4546 }
4547}
4548
sewardjf98e1c02008-10-25 16:22:41 +00004549
sewardj1669cc72008-12-13 01:20:21 +00004550inline static UInt min_UInt ( UInt a, UInt b ) {
4551 return a < b ? a : b;
4552}
4553
sewardja781be62008-12-08 00:12:28 +00004554/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4555 first interval is lower, 1 if the first interval is higher, and 0
4556 if there is any overlap. Redundant paranoia with casting is there
4557 following what looked distinctly like a bug in gcc-4.1.2, in which
4558 some of the comparisons were done signedly instead of
4559 unsignedly. */
4560/* Copied from exp-ptrcheck/sg_main.c */
4561static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4562 Addr a2, SizeT n2 ) {
4563 UWord a1w = (UWord)a1;
4564 UWord n1w = (UWord)n1;
4565 UWord a2w = (UWord)a2;
4566 UWord n2w = (UWord)n2;
4567 tl_assert(n1w > 0 && n2w > 0);
4568 if (a1w + n1w <= a2w) return -1L;
4569 if (a2w + n2w <= a1w) return 1L;
4570 return 0;
4571}
4572
sewardjc5ea9962008-12-07 01:41:46 +00004573static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00004574{
sewardjd86e3a22008-12-03 11:39:37 +00004575 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00004576 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00004577 Word i, j;
philippe40648e22015-04-11 11:42:22 +00004578 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004579 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004580
sewardjffce8152011-06-24 10:09:41 +00004581 tl_assert(thr);
4582 ThrID thrid = thr->thrid;
4583 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4584
4585 WordSetID locksHeldW = thr->hgthread->locksetW;
4586
sewardjc5ea9962008-12-07 01:41:46 +00004587 rcec = get_RCEC( thr );
4588 ctxt__rcinc(rcec);
4589
sewardjffce8152011-06-24 10:09:41 +00004590 UInt szLg2B = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004591 switch (szB) {
4592 /* This doesn't look particularly branch-predictor friendly. */
sewardjffce8152011-06-24 10:09:41 +00004593 case 1: szLg2B = 0; break;
4594 case 2: szLg2B = 1; break;
4595 case 4: szLg2B = 2; break;
4596 case 8: szLg2B = 3; break;
sewardjc5ea9962008-12-07 01:41:46 +00004597 default: tl_assert(0);
4598 }
4599
sewardjffce8152011-06-24 10:09:41 +00004600 /* Look in the map to see if we already have a record for this
4601 address. */
philippe40648e22015-04-11 11:42:22 +00004602 b = VG_(lookupSWA)( oldrefTree, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00004603
sewardjd86e3a22008-12-03 11:39:37 +00004604 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00004605
4606 /* We already have a record for this address. We now need to
sewardjffce8152011-06-24 10:09:41 +00004607 see if we have a stack trace pertaining to this (thrid, R/W,
sewardj849b0ed2008-12-21 10:43:10 +00004608 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00004609 ref = (OldRef*)valW;
philippecabdbb52015-04-20 21:33:16 +00004610
4611 tl_assert (ref->ga == a);
sewardjf98e1c02008-10-25 16:22:41 +00004612
sewardjf98e1c02008-10-25 16:22:41 +00004613 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004614 if (ref->accs[i].thrid != thrid)
sewardj849b0ed2008-12-21 10:43:10 +00004615 continue;
sewardjffce8152011-06-24 10:09:41 +00004616 if (ref->accs[i].szLg2B != szLg2B)
4617 continue;
4618 if (ref->accs[i].isW != (UInt)(isW & 1))
sewardj849b0ed2008-12-21 10:43:10 +00004619 continue;
4620 /* else we have a match, so stop looking. */
4621 break;
sewardjf98e1c02008-10-25 16:22:41 +00004622 }
4623
4624 if (i < N_OLDREF_ACCS) {
sewardjffce8152011-06-24 10:09:41 +00004625 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */
sewardjf98e1c02008-10-25 16:22:41 +00004626 if (i > 0) {
4627 Thr_n_RCEC tmp = ref->accs[i-1];
4628 ref->accs[i-1] = ref->accs[i];
4629 ref->accs[i] = tmp;
4630 i--;
4631 }
sewardjc5ea9962008-12-07 01:41:46 +00004632 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00004633 stats__ctxt_rcdec1++;
sewardjffce8152011-06-24 10:09:41 +00004634 ctxt__rcdec( ref->accs[i].rcec );
4635 tl_assert(ref->accs[i].thrid == thrid);
4636 /* Update the RCEC and the W-held lockset. */
4637 ref->accs[i].rcec = rcec;
4638 ref->accs[i].locksHeldW = locksHeldW;
sewardjf98e1c02008-10-25 16:22:41 +00004639 } else {
sewardjffce8152011-06-24 10:09:41 +00004640 /* No entry for this (thread, R/W, size, nWHeld) quad.
4641 Shuffle all of them down one slot, and put the new entry
4642 at the start of the array. */
4643 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
sewardjf98e1c02008-10-25 16:22:41 +00004644 /* the last slot is in use. We must dec the rc on the
4645 associated rcec. */
4646 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
4647 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00004648 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
4649 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjffce8152011-06-24 10:09:41 +00004650 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
sewardjf98e1c02008-10-25 16:22:41 +00004651 } else {
4652 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
4653 }
4654 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
4655 ref->accs[j] = ref->accs[j-1];
sewardjffce8152011-06-24 10:09:41 +00004656 ref->accs[0].thrid = thrid;
4657 ref->accs[0].szLg2B = szLg2B;
4658 ref->accs[0].isW = (UInt)(isW & 1);
4659 ref->accs[0].locksHeldW = locksHeldW;
4660 ref->accs[0].rcec = rcec;
4661 /* thrid==0 is used to signify an empty slot, so we can't
4662 add zero thrid (such a ThrID is invalid anyway). */
4663 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
sewardjf98e1c02008-10-25 16:22:41 +00004664 }
4665
philippecabdbb52015-04-20 21:33:16 +00004666 OldRef_unchain(ref);
4667 OldRef_newest(ref);
sewardjf98e1c02008-10-25 16:22:41 +00004668
4669 } else {
4670
4671 /* We don't have a record for this address. Create a new one. */
philippecabdbb52015-04-20 21:33:16 +00004672 ref = alloc_or_reuse_OldRef();
4673 ref->ga = a;
sewardjffce8152011-06-24 10:09:41 +00004674 ref->accs[0].thrid = thrid;
4675 ref->accs[0].szLg2B = szLg2B;
4676 ref->accs[0].isW = (UInt)(isW & 1);
4677 ref->accs[0].locksHeldW = locksHeldW;
4678 ref->accs[0].rcec = rcec;
4679
4680 /* thrid==0 is used to signify an empty slot, so we can't
4681 add zero thrid (such a ThrID is invalid anyway). */
4682 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4683
4684 /* Clear out the rest of the entries */
sewardjf98e1c02008-10-25 16:22:41 +00004685 for (j = 1; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004686 ref->accs[j].rcec = NULL;
4687 ref->accs[j].thrid = 0;
4688 ref->accs[j].szLg2B = 0;
4689 ref->accs[j].isW = 0;
4690 ref->accs[j].locksHeldW = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004691 }
sewardjbc307e52008-12-06 22:10:54 +00004692 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
philippecabdbb52015-04-20 21:33:16 +00004693 OldRef_newest (ref);
sewardjf98e1c02008-10-25 16:22:41 +00004694 }
4695}
4696
4697
sewardjffce8152011-06-24 10:09:41 +00004698/* Extract info from the conflicting-access machinery. */
sewardjc5ea9962008-12-07 01:41:46 +00004699Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjffce8152011-06-24 10:09:41 +00004700 /*OUT*/Thr** resThr,
4701 /*OUT*/SizeT* resSzB,
4702 /*OUT*/Bool* resIsW,
4703 /*OUT*/WordSetID* locksHeldW,
sewardjc5ea9962008-12-07 01:41:46 +00004704 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00004705{
sewardja781be62008-12-08 00:12:28 +00004706 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00004707 OldRef* ref;
philippe40648e22015-04-11 11:42:22 +00004708 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004709 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004710
sewardjffce8152011-06-24 10:09:41 +00004711 ThrID cand_thrid;
4712 RCEC* cand_rcec;
4713 Bool cand_isW;
4714 SizeT cand_szB;
4715 WordSetID cand_locksHeldW;
4716 Addr cand_a;
sewardja781be62008-12-08 00:12:28 +00004717
4718 Addr toCheck[15];
4719 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004720
4721 tl_assert(thr);
4722 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00004723
sewardjffce8152011-06-24 10:09:41 +00004724 ThrID thrid = thr->thrid;
4725
sewardja781be62008-12-08 00:12:28 +00004726 toCheck[nToCheck++] = a;
4727 for (i = -7; i < (Word)szB; i++) {
4728 if (i != 0)
4729 toCheck[nToCheck++] = a + i;
4730 }
4731 tl_assert(nToCheck <= 15);
4732
4733 /* Now see if we can find a suitable matching event for
4734 any of the addresses in toCheck[0 .. nToCheck-1]. */
4735 for (j = 0; j < nToCheck; j++) {
4736
4737 cand_a = toCheck[j];
4738 // VG_(printf)("test %ld %p\n", j, cand_a);
4739
philippe40648e22015-04-11 11:42:22 +00004740 b = VG_(lookupSWA)( oldrefTree, &valW, cand_a );
sewardja781be62008-12-08 00:12:28 +00004741 if (!b)
4742 continue;
4743
sewardjd86e3a22008-12-03 11:39:37 +00004744 ref = (OldRef*)valW;
sewardjffce8152011-06-24 10:09:41 +00004745 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
sewardjf98e1c02008-10-25 16:22:41 +00004746
sewardjffce8152011-06-24 10:09:41 +00004747 cand_thrid = 0; /* invalid; see comments in event_map_bind */
4748 cand_rcec = NULL;
4749 cand_isW = False;
4750 cand_szB = 0;
4751 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
sewardjf98e1c02008-10-25 16:22:41 +00004752
sewardjc5ea9962008-12-07 01:41:46 +00004753 for (i = 0; i < N_OLDREF_ACCS; i++) {
4754 Thr_n_RCEC* cand = &ref->accs[i];
sewardjffce8152011-06-24 10:09:41 +00004755 cand_rcec = cand->rcec;
4756 cand_thrid = cand->thrid;
4757 cand_isW = (Bool)cand->isW;
4758 cand_szB = 1 << cand->szLg2B;
4759 cand_locksHeldW = cand->locksHeldW;
sewardjc5ea9962008-12-07 01:41:46 +00004760
sewardjffce8152011-06-24 10:09:41 +00004761 if (cand_thrid == 0)
sewardjc5ea9962008-12-07 01:41:46 +00004762 /* This slot isn't in use. Ignore it. */
4763 continue;
4764
sewardjffce8152011-06-24 10:09:41 +00004765 if (cand_thrid == thrid)
sewardjc5ea9962008-12-07 01:41:46 +00004766 /* This is an access by the same thread, but we're only
4767 interested in accesses from other threads. Ignore. */
4768 continue;
4769
4770 if ((!cand_isW) && (!isW))
4771 /* We don't want to report a read racing against another
4772 read; that's stupid. So in this case move on. */
4773 continue;
4774
sewardja781be62008-12-08 00:12:28 +00004775 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
4776 /* No overlap with the access we're asking about. Ignore. */
4777 continue;
4778
sewardjc5ea9962008-12-07 01:41:46 +00004779 /* We have a match. Stop searching. */
4780 break;
4781 }
4782
4783 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
4784
sewardja781be62008-12-08 00:12:28 +00004785 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00004786 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00004787 /* return with success */
sewardjffce8152011-06-24 10:09:41 +00004788 tl_assert(cand_thrid);
sewardja781be62008-12-08 00:12:28 +00004789 tl_assert(cand_rcec);
4790 tl_assert(cand_rcec->magic == RCEC_MAGIC);
4791 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00004792 /* Count how many non-zero frames we have. */
4793 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4794 for (n = 0; n < maxNFrames; n++) {
4795 if (0 == cand_rcec->frames[n]) break;
4796 }
sewardjffce8152011-06-24 10:09:41 +00004797 *resEC = VG_(make_ExeContext_from_StackTrace)
4798 (cand_rcec->frames, n);
4799 *resThr = Thr__from_ThrID(cand_thrid);
4800 *resSzB = cand_szB;
4801 *resIsW = cand_isW;
4802 *locksHeldW = cand_locksHeldW;
sewardja781be62008-12-08 00:12:28 +00004803 return True;
4804 }
sewardjc5ea9962008-12-07 01:41:46 +00004805
sewardja781be62008-12-08 00:12:28 +00004806 /* consider next address in toCheck[] */
4807 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00004808
sewardja781be62008-12-08 00:12:28 +00004809 /* really didn't find anything. */
4810 return False;
sewardjf98e1c02008-10-25 16:22:41 +00004811}
4812
4813static void event_map_init ( void )
4814{
4815 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00004816
philippe6643e962012-01-17 21:16:30 +00004817 /* Context (RCEC) pool allocator */
4818 rcec_pool_allocator = VG_(newPA) (
4819 sizeof(RCEC),
4820 1000 /* RCECs per pool */,
4821 HG_(zalloc),
4822 "libhb.event_map_init.1 (RCEC pools)",
4823 HG_(free)
4824 );
sewardjd86e3a22008-12-03 11:39:37 +00004825
4826 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00004827 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00004828 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00004829 N_RCEC_TAB * sizeof(RCEC*) );
sewardjf98e1c02008-10-25 16:22:41 +00004830 for (i = 0; i < N_RCEC_TAB; i++)
4831 contextTab[i] = NULL;
4832
philippe6643e962012-01-17 21:16:30 +00004833 /* Oldref pool allocator */
4834 oldref_pool_allocator = VG_(newPA)(
4835 sizeof(OldRef),
4836 1000 /* OldRefs per pool */,
4837 HG_(zalloc),
4838 "libhb.event_map_init.3 (OldRef pools)",
4839 HG_(free)
4840 );
sewardjd86e3a22008-12-03 11:39:37 +00004841
sewardjd86e3a22008-12-03 11:39:37 +00004842 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00004843 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00004844 oldrefTree = VG_(newSWA)(
4845 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00004846 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00004847 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00004848 );
sewardjf98e1c02008-10-25 16:22:41 +00004849
sewardjf98e1c02008-10-25 16:22:41 +00004850 oldrefTreeN = 0;
philippecabdbb52015-04-20 21:33:16 +00004851 mru.prev = &lru;
4852 mru.next = NULL;
4853 lru.prev = NULL;
4854 lru.next = &mru;
4855 for (i = 0; i < N_OLDREF_ACCS; i++) {
4856 mru.accs[i] = (Thr_n_RCEC) {.rcec = NULL,
4857 .locksHeldW = 0,
4858 .thrid = 0,
4859 .szLg2B = 0,
4860 .isW = 0};
4861 lru.accs[i] = mru.accs[i];
4862 }
sewardjf98e1c02008-10-25 16:22:41 +00004863}
4864
philippecabdbb52015-04-20 21:33:16 +00004865static void event_map__check_reference_counts ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004866{
4867 RCEC* rcec;
4868 OldRef* oldref;
4869 Word i;
4870 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00004871 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00004872
4873 /* Set the 'check' reference counts to zero. Also, optionally
4874 check that the real reference counts are non-zero. We allow
4875 these to fall to zero before a GC, but the GC must get rid of
4876 all those that are zero, hence none should be zero after a
4877 GC. */
4878 for (i = 0; i < N_RCEC_TAB; i++) {
4879 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4880 nEnts++;
4881 tl_assert(rcec);
4882 tl_assert(rcec->magic == RCEC_MAGIC);
sewardjf98e1c02008-10-25 16:22:41 +00004883 rcec->rcX = 0;
4884 }
4885 }
4886
4887 /* check that the stats are sane */
4888 tl_assert(nEnts == stats__ctxt_tab_curr);
4889 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4890
4891 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00004892 VG_(initIterSWA)( oldrefTree );
4893 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004894 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004895 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004896 ThrID aThrID = oldref->accs[i].thrid;
4897 RCEC* aRef = oldref->accs[i].rcec;
4898 if (aThrID != 0) {
sewardjc5ea9962008-12-07 01:41:46 +00004899 tl_assert(aRef);
4900 tl_assert(aRef->magic == RCEC_MAGIC);
4901 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00004902 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004903 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00004904 }
4905 }
4906 }
4907
4908 /* compare check ref counts with actual */
4909 for (i = 0; i < N_RCEC_TAB; i++) {
4910 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4911 tl_assert(rcec->rc == rcec->rcX);
4912 }
4913 }
4914}
4915
sewardj8fd92d32008-11-20 23:17:01 +00004916__attribute__((noinline))
philippecabdbb52015-04-20 21:33:16 +00004917static void do_RCEC_GC ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004918{
philippecabdbb52015-04-20 21:33:16 +00004919 UInt i;
sewardjf98e1c02008-10-25 16:22:41 +00004920
philippecabdbb52015-04-20 21:33:16 +00004921 if (VG_(clo_stats)) {
4922 static UInt ctr = 1;
4923 VG_(message)(Vg_DebugMsg,
4924 "libhb: RCEC GC: #%u %lu slots,"
4925 " %lu cur ents(ref'd %lu),"
4926 " %lu max ents\n",
4927 ctr++,
4928 (UWord)N_RCEC_TAB,
4929 stats__ctxt_tab_curr, RCEC_referenced,
4930 stats__ctxt_tab_max );
sewardjf98e1c02008-10-25 16:22:41 +00004931 }
philippecabdbb52015-04-20 21:33:16 +00004932 tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004933
4934 /* Throw away all RCECs with zero reference counts */
4935 for (i = 0; i < N_RCEC_TAB; i++) {
4936 RCEC** pp = &contextTab[i];
4937 RCEC* p = *pp;
4938 while (p) {
4939 if (p->rc == 0) {
4940 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004941 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004942 p = *pp;
4943 tl_assert(stats__ctxt_tab_curr > 0);
philippe06bc23a2015-04-17 21:19:43 +00004944 stats__ctxt_rcdec_discards++;
sewardjf98e1c02008-10-25 16:22:41 +00004945 stats__ctxt_tab_curr--;
4946 } else {
4947 pp = &p->next;
4948 p = p->next;
4949 }
4950 }
4951 }
4952
philippecabdbb52015-04-20 21:33:16 +00004953 tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004954}
4955
sewardjf98e1c02008-10-25 16:22:41 +00004956/////////////////////////////////////////////////////////
4957// //
4958// Core MSM //
4959// //
4960/////////////////////////////////////////////////////////
4961
sewardj23f12002009-07-24 08:45:08 +00004962/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4963 Nov 08, and again after [...],
4964 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004965
sewardj23f12002009-07-24 08:45:08 +00004966static ULong stats__msmcread = 0;
4967static ULong stats__msmcread_change = 0;
4968static ULong stats__msmcwrite = 0;
4969static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004970
sewardj8ab2c132009-08-02 09:34:35 +00004971/* Some notes on the H1 history mechanism:
4972
4973 Transition rules are:
4974
4975 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4976 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4977
4978 After any access by a thread T to a location L, L's constraint pair
4979 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4980
4981 After a race by thread T conflicting with some previous access by
4982 some other thread U, for a location with constraint (before
4983 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4984 which the previously access lies.
4985
4986 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4987 are compared so as to find out which thread(s) this access
4988 conflicts with. Once that is established, we also require the
4989 pre-update Cw for the location, so we can index into it for those
4990 threads, to get the scalar clock values for the point at which the
4991 former accesses were made. (In fact we only bother to do any of
4992 this for an arbitrarily chosen one of the conflicting threads, as
4993 that's simpler, it avoids flooding the user with vast amounts of
4994 mostly useless information, and because the program is wrong if it
4995 contains any races at all -- so we don't really need to show all
4996 conflicting access pairs initially, so long as we only show none if
4997 none exist).
4998
4999 ---
5000
5001 That requires the auxiliary proof that
5002
5003 (Cr `join` Kw)[T] == Kw[T]
5004
5005 Why should that be true? Because for any thread T, Kw[T] >= the
5006 scalar clock value for T known by any other thread. In other
5007 words, because T's value for its own scalar clock is at least as up
5008 to date as the value for it known by any other thread (that is true
5009 for both the R- and W- scalar clocks). Hence no other thread will
5010 be able to feed in a value for that element (indirectly via a
5011 constraint) which will exceed Kw[T], and hence the join cannot
5012 cause that particular element to advance.
5013*/
5014
sewardjf98e1c02008-10-25 16:22:41 +00005015__attribute__((noinline))
5016static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00005017 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00005018 VtsID Cfailed,
5019 VtsID Kfailed,
5020 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00005021{
sewardjc5ea9962008-12-07 01:41:46 +00005022 /* Call here to report a race. We just hand it onwards to
5023 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00005024 error is going to be collected, then, at history_level 2, that
5025 queries the conflicting-event map. The alternative would be to
5026 query it right here. But that causes a lot of pointless queries
5027 for errors which will shortly be discarded as duplicates, and
5028 can become a performance overhead; so we defer the query until
5029 we know the error is not a duplicate. */
5030
5031 /* Stacks for the bounds of the (or one of the) conflicting
5032 segment(s). These are only set at history_level 1. */
5033 ExeContext* hist1_seg_start = NULL;
5034 ExeContext* hist1_seg_end = NULL;
5035 Thread* hist1_conf_thr = NULL;
5036
5037 tl_assert(acc_thr);
sewardj60626642011-03-10 15:14:37 +00005038 tl_assert(acc_thr->hgthread);
5039 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
sewardj23f12002009-07-24 08:45:08 +00005040 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
5041
5042 if (HG_(clo_history_level) == 1) {
5043 Bool found;
5044 Word firstIx, lastIx;
5045 ULong_n_EC key;
5046
5047 /* At history_level 1, we must round up the relevant stack-pair
5048 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00005049 deferring it is complex; we can't (easily) put Kfailed and
5050 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00005051 getting tied up in difficulties with VtsID reference
5052 counting. So just do it now. */
5053 Thr* confThr;
5054 ULong confTym = 0;
5055 /* Which thread are we in conflict with? There may be more than
5056 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
5057 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00005058 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00005059 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00005060 conflict (semantics of return value of
5061 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
5062 called us, just checked exactly this -- that there was in
5063 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00005064 tl_assert(confThr);
5065
5066 /* Get the scalar clock value that the conflicting thread
5067 introduced into the constraint. A careful examination of the
5068 base machine rules shows that this must be the same as the
5069 conflicting thread's scalar clock when it created this
5070 constraint. Hence we know the scalar clock of the
5071 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00005072 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00005073
5074 /* Using this scalar clock, index into the conflicting thread's
5075 collection of stack traces made each time its vector clock
5076 (hence its scalar clock) changed. This gives the stack
5077 traces at the start and end of the conflicting segment (well,
5078 as per comment just above, of one of the conflicting
5079 segments, if there are more than one). */
5080 key.ull = confTym;
5081 key.ec = NULL;
5082 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00005083 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00005084 firstIx = lastIx = 0;
5085 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00005086 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00005087 &key, &firstIx, &lastIx,
florian6bd9dc12012-11-23 16:17:43 +00005088 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
sewardj23f12002009-07-24 08:45:08 +00005089 );
sewardj8ab2c132009-08-02 09:34:35 +00005090 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00005091 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00005092 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00005093 confThr, confTym, found, firstIx, lastIx);
5094 /* We can't indefinitely collect stack traces at VTS
5095 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00005096 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00005097 ones, which in turn means we might fail to find index value
5098 confTym in the array. */
5099 if (found) {
5100 ULong_n_EC *pair_start, *pair_end;
5101 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00005102 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00005103 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00005104 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00005105 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00005106 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00005107 lastIx+1 );
5108 /* from properties of VG_(lookupXA) and the comparison fn used: */
5109 tl_assert(pair_start->ull < pair_end->ull);
5110 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00005111 /* Could do a bit better here. It may be that pair_end
5112 doesn't have a stack, but the following entries in the
5113 array have the same scalar Kw and to have a stack. So
5114 we should search a bit further along the array than
5115 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00005116 } else {
sewardjffce8152011-06-24 10:09:41 +00005117 if (!confThr->llexit_done)
sewardj23f12002009-07-24 08:45:08 +00005118 hist1_seg_end = main_get_EC( confThr );
5119 }
5120 // seg_start could be NULL iff this is the first stack in the thread
5121 //if (seg_start) VG_(pp_ExeContext)(seg_start);
5122 //if (seg_end) VG_(pp_ExeContext)(seg_end);
sewardj60626642011-03-10 15:14:37 +00005123 hist1_conf_thr = confThr->hgthread;
sewardj23f12002009-07-24 08:45:08 +00005124 }
5125 }
5126
sewardj60626642011-03-10 15:14:37 +00005127 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00005128 szB, isWrite,
5129 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00005130}
5131
5132static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00005133 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00005134 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00005135 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
5136 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00005137}
5138
5139
5140/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00005141static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005142 /* The following are only needed for
5143 creating error reports. */
5144 Thr* acc_thr,
5145 Addr acc_addr, SizeT szB )
5146{
5147 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005148 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00005149
5150 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005151 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005152 tl_assert(is_sane_SVal_C(svOld));
5153 }
5154
sewardj1c0ce7a2009-07-01 08:10:49 +00005155 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005156 VtsID tviR = acc_thr->viR;
5157 VtsID tviW = acc_thr->viW;
5158 VtsID rmini = SVal__unC_Rmin(svOld);
5159 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005160 Bool leq = VtsID__cmpLEQ(rmini,tviR);
5161 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005162 /* no race */
5163 /* Note: RWLOCK subtlety: use tviW, not tviR */
5164 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5165 goto out;
5166 } else {
sewardjb0e009d2008-11-19 16:35:15 +00005167 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005168 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5169 tl_assert(leqxx);
5170 // same as in non-race case
5171 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5172 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005173 rmini, /* Cfailed */
5174 tviR, /* Kfailed */
5175 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005176 goto out;
5177 }
5178 }
5179 if (SVal__isA(svOld)) {
5180 /* reading no-access memory (sigh); leave unchanged */
5181 /* check for no pollution */
5182 tl_assert(svOld == SVal_NOACCESS);
5183 svNew = SVal_NOACCESS;
5184 goto out;
5185 }
sewardj23f12002009-07-24 08:45:08 +00005186 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005187 tl_assert(0);
5188
5189 out:
sewardj8f5374e2008-12-07 11:40:17 +00005190 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005191 tl_assert(is_sane_SVal_C(svNew));
5192 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005193 if (UNLIKELY(svNew != svOld)) {
5194 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005195 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005196 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005197 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005198 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005199 }
5200 }
5201 return svNew;
5202}
5203
5204
5205/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00005206static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005207 /* The following are only needed for
5208 creating error reports. */
5209 Thr* acc_thr,
5210 Addr acc_addr, SizeT szB )
5211{
5212 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005213 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00005214
5215 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005216 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005217 tl_assert(is_sane_SVal_C(svOld));
5218 }
5219
sewardj1c0ce7a2009-07-01 08:10:49 +00005220 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005221 VtsID tviW = acc_thr->viW;
5222 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005223 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5224 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005225 /* no race */
5226 svNew = SVal__mkC( tviW, tviW );
5227 goto out;
5228 } else {
5229 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00005230 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005231 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5232 tl_assert(leqxx);
5233 // same as in non-race case
5234 // proof: in the non-race case, we have
5235 // rmini <= wmini (invar on constraints)
5236 // tviW <= tviR (invar on thread clocks)
5237 // wmini <= tviW (from run-time check)
5238 // hence from transitivity of <= we have
5239 // rmini <= wmini <= tviW
5240 // and so join(rmini,tviW) == tviW
5241 // and join(wmini,tviW) == tviW
5242 // qed.
5243 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5244 VtsID__join2(wmini, tviW) );
5245 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005246 wmini, /* Cfailed */
5247 tviW, /* Kfailed */
5248 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005249 goto out;
5250 }
5251 }
5252 if (SVal__isA(svOld)) {
5253 /* writing no-access memory (sigh); leave unchanged */
5254 /* check for no pollution */
5255 tl_assert(svOld == SVal_NOACCESS);
5256 svNew = SVal_NOACCESS;
5257 goto out;
5258 }
sewardj23f12002009-07-24 08:45:08 +00005259 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005260 tl_assert(0);
5261
5262 out:
sewardj8f5374e2008-12-07 11:40:17 +00005263 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005264 tl_assert(is_sane_SVal_C(svNew));
5265 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005266 if (UNLIKELY(svNew != svOld)) {
5267 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005268 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005269 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005270 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005271 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005272 }
5273 }
5274 return svNew;
5275}
5276
5277
5278/////////////////////////////////////////////////////////
5279// //
5280// Apply core MSM to specific memory locations //
5281// //
5282/////////////////////////////////////////////////////////
5283
sewardj23f12002009-07-24 08:45:08 +00005284/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005285
sewardj23f12002009-07-24 08:45:08 +00005286static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005287 CacheLine* cl;
5288 UWord cloff, tno, toff;
5289 SVal svOld, svNew;
5290 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005291 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005292 cl = get_cacheline(a);
5293 cloff = get_cacheline_offset(a);
5294 tno = get_treeno(a);
5295 toff = get_tree_offset(a); /* == 0 .. 7 */
5296 descr = cl->descrs[tno];
5297 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5298 SVal* tree = &cl->svals[tno << 3];
5299 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005300 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005301 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5302 }
5303 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005304 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005305 if (CHECK_ZSM)
5306 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005307 cl->svals[cloff] = svNew;
5308}
5309
sewardj23f12002009-07-24 08:45:08 +00005310static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005311 CacheLine* cl;
5312 UWord cloff, tno, toff;
5313 SVal svOld, svNew;
5314 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005315 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005316 cl = get_cacheline(a);
5317 cloff = get_cacheline_offset(a);
5318 tno = get_treeno(a);
5319 toff = get_tree_offset(a); /* == 0 .. 7 */
5320 descr = cl->descrs[tno];
5321 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5322 SVal* tree = &cl->svals[tno << 3];
5323 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005324 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005325 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5326 }
5327 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005328 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005329 if (CHECK_ZSM)
5330 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005331 cl->svals[cloff] = svNew;
5332}
5333
sewardj23f12002009-07-24 08:45:08 +00005334/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005335
sewardj23f12002009-07-24 08:45:08 +00005336static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005337 CacheLine* cl;
5338 UWord cloff, tno, toff;
5339 SVal svOld, svNew;
5340 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005341 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005342 if (UNLIKELY(!aligned16(a))) goto slowcase;
5343 cl = get_cacheline(a);
5344 cloff = get_cacheline_offset(a);
5345 tno = get_treeno(a);
5346 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5347 descr = cl->descrs[tno];
5348 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5349 if (valid_value_is_below_me_16(descr, toff)) {
5350 goto slowcase;
5351 } else {
5352 SVal* tree = &cl->svals[tno << 3];
5353 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5354 }
sewardj8f5374e2008-12-07 11:40:17 +00005355 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005356 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5357 }
5358 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005359 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005360 if (CHECK_ZSM)
5361 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005362 cl->svals[cloff] = svNew;
5363 return;
5364 slowcase: /* misaligned, or must go further down the tree */
5365 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005366 zsm_sapply08__msmcread( thr, a + 0 );
5367 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005368}
5369
sewardj23f12002009-07-24 08:45:08 +00005370static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005371 CacheLine* cl;
5372 UWord cloff, tno, toff;
5373 SVal svOld, svNew;
5374 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005375 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005376 if (UNLIKELY(!aligned16(a))) goto slowcase;
5377 cl = get_cacheline(a);
5378 cloff = get_cacheline_offset(a);
5379 tno = get_treeno(a);
5380 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5381 descr = cl->descrs[tno];
5382 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5383 if (valid_value_is_below_me_16(descr, toff)) {
5384 goto slowcase;
5385 } else {
5386 SVal* tree = &cl->svals[tno << 3];
5387 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5388 }
sewardj8f5374e2008-12-07 11:40:17 +00005389 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005390 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5391 }
5392 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005393 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005394 if (CHECK_ZSM)
5395 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005396 cl->svals[cloff] = svNew;
5397 return;
5398 slowcase: /* misaligned, or must go further down the tree */
5399 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005400 zsm_sapply08__msmcwrite( thr, a + 0 );
5401 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005402}
5403
sewardj23f12002009-07-24 08:45:08 +00005404/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005405
sewardj23f12002009-07-24 08:45:08 +00005406static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005407 CacheLine* cl;
5408 UWord cloff, tno, toff;
5409 SVal svOld, svNew;
5410 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005411 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005412 if (UNLIKELY(!aligned32(a))) goto slowcase;
5413 cl = get_cacheline(a);
5414 cloff = get_cacheline_offset(a);
5415 tno = get_treeno(a);
5416 toff = get_tree_offset(a); /* == 0 or 4 */
5417 descr = cl->descrs[tno];
5418 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5419 if (valid_value_is_above_me_32(descr, toff)) {
5420 SVal* tree = &cl->svals[tno << 3];
5421 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5422 } else {
5423 goto slowcase;
5424 }
sewardj8f5374e2008-12-07 11:40:17 +00005425 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005426 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5427 }
5428 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005429 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005430 if (CHECK_ZSM)
5431 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005432 cl->svals[cloff] = svNew;
5433 return;
5434 slowcase: /* misaligned, or must go further down the tree */
5435 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005436 zsm_sapply16__msmcread( thr, a + 0 );
5437 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005438}
5439
sewardj23f12002009-07-24 08:45:08 +00005440static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005441 CacheLine* cl;
5442 UWord cloff, tno, toff;
5443 SVal svOld, svNew;
5444 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005445 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005446 if (UNLIKELY(!aligned32(a))) goto slowcase;
5447 cl = get_cacheline(a);
5448 cloff = get_cacheline_offset(a);
5449 tno = get_treeno(a);
5450 toff = get_tree_offset(a); /* == 0 or 4 */
5451 descr = cl->descrs[tno];
5452 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5453 if (valid_value_is_above_me_32(descr, toff)) {
5454 SVal* tree = &cl->svals[tno << 3];
5455 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5456 } else {
5457 goto slowcase;
5458 }
sewardj8f5374e2008-12-07 11:40:17 +00005459 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005460 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5461 }
5462 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005463 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005464 if (CHECK_ZSM)
5465 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005466 cl->svals[cloff] = svNew;
5467 return;
5468 slowcase: /* misaligned, or must go further down the tree */
5469 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005470 zsm_sapply16__msmcwrite( thr, a + 0 );
5471 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005472}
5473
sewardj23f12002009-07-24 08:45:08 +00005474/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005475
sewardj23f12002009-07-24 08:45:08 +00005476static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005477 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005478 UWord cloff, tno;
5479 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005480 SVal svOld, svNew;
5481 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005482 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005483 if (UNLIKELY(!aligned64(a))) goto slowcase;
5484 cl = get_cacheline(a);
5485 cloff = get_cacheline_offset(a);
5486 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005487 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005488 descr = cl->descrs[tno];
5489 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5490 goto slowcase;
5491 }
5492 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005493 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005494 if (CHECK_ZSM)
5495 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005496 cl->svals[cloff] = svNew;
5497 return;
5498 slowcase: /* misaligned, or must go further down the tree */
5499 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005500 zsm_sapply32__msmcread( thr, a + 0 );
5501 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005502}
5503
sewardj23f12002009-07-24 08:45:08 +00005504static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005505 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005506 UWord cloff, tno;
5507 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005508 SVal svOld, svNew;
5509 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005510 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005511 if (UNLIKELY(!aligned64(a))) goto slowcase;
5512 cl = get_cacheline(a);
5513 cloff = get_cacheline_offset(a);
5514 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005515 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005516 descr = cl->descrs[tno];
5517 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5518 goto slowcase;
5519 }
5520 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005521 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005522 if (CHECK_ZSM)
5523 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005524 cl->svals[cloff] = svNew;
5525 return;
5526 slowcase: /* misaligned, or must go further down the tree */
5527 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005528 zsm_sapply32__msmcwrite( thr, a + 0 );
5529 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005530}
5531
sewardj23f12002009-07-24 08:45:08 +00005532/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005533
5534static
sewardj23f12002009-07-24 08:45:08 +00005535void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005536 CacheLine* cl;
5537 UWord cloff, tno, toff;
5538 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005539 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005540 cl = get_cacheline(a);
5541 cloff = get_cacheline_offset(a);
5542 tno = get_treeno(a);
5543 toff = get_tree_offset(a); /* == 0 .. 7 */
5544 descr = cl->descrs[tno];
5545 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5546 SVal* tree = &cl->svals[tno << 3];
5547 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005548 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005549 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5550 }
5551 tl_assert(svNew != SVal_INVALID);
5552 cl->svals[cloff] = svNew;
5553}
5554
sewardj23f12002009-07-24 08:45:08 +00005555/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005556
5557static
sewardj23f12002009-07-24 08:45:08 +00005558void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005559 CacheLine* cl;
5560 UWord cloff, tno, toff;
5561 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005562 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005563 if (UNLIKELY(!aligned16(a))) goto slowcase;
5564 cl = get_cacheline(a);
5565 cloff = get_cacheline_offset(a);
5566 tno = get_treeno(a);
5567 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5568 descr = cl->descrs[tno];
5569 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5570 if (valid_value_is_below_me_16(descr, toff)) {
5571 /* Writing at this level. Need to fix up 'descr'. */
5572 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5573 /* At this point, the tree does not match cl->descr[tno] any
5574 more. The assignments below will fix it up. */
5575 } else {
5576 /* We can't indiscriminately write on the w16 node as in the
5577 w64 case, as that might make the node inconsistent with
5578 its parent. So first, pull down to this level. */
5579 SVal* tree = &cl->svals[tno << 3];
5580 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005581 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005582 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5583 }
5584 }
5585 tl_assert(svNew != SVal_INVALID);
5586 cl->svals[cloff + 0] = svNew;
5587 cl->svals[cloff + 1] = SVal_INVALID;
5588 return;
5589 slowcase: /* misaligned */
5590 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005591 zsm_swrite08( a + 0, svNew );
5592 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005593}
5594
sewardj23f12002009-07-24 08:45:08 +00005595/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005596
5597static
sewardj23f12002009-07-24 08:45:08 +00005598void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005599 CacheLine* cl;
5600 UWord cloff, tno, toff;
5601 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005602 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005603 if (UNLIKELY(!aligned32(a))) goto slowcase;
5604 cl = get_cacheline(a);
5605 cloff = get_cacheline_offset(a);
5606 tno = get_treeno(a);
5607 toff = get_tree_offset(a); /* == 0 or 4 */
5608 descr = cl->descrs[tno];
5609 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5610 if (valid_value_is_above_me_32(descr, toff)) {
5611 /* We can't indiscriminately write on the w32 node as in the
5612 w64 case, as that might make the node inconsistent with
5613 its parent. So first, pull down to this level. */
5614 SVal* tree = &cl->svals[tno << 3];
5615 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005616 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005617 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5618 } else {
5619 /* Writing at this level. Need to fix up 'descr'. */
5620 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5621 /* At this point, the tree does not match cl->descr[tno] any
5622 more. The assignments below will fix it up. */
5623 }
5624 }
5625 tl_assert(svNew != SVal_INVALID);
5626 cl->svals[cloff + 0] = svNew;
5627 cl->svals[cloff + 1] = SVal_INVALID;
5628 cl->svals[cloff + 2] = SVal_INVALID;
5629 cl->svals[cloff + 3] = SVal_INVALID;
5630 return;
5631 slowcase: /* misaligned */
5632 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005633 zsm_swrite16( a + 0, svNew );
5634 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005635}
5636
sewardj23f12002009-07-24 08:45:08 +00005637/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005638
5639static
sewardj23f12002009-07-24 08:45:08 +00005640void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005641 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005642 UWord cloff, tno;
5643 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00005644 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005645 if (UNLIKELY(!aligned64(a))) goto slowcase;
5646 cl = get_cacheline(a);
5647 cloff = get_cacheline_offset(a);
5648 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005649 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005650 cl->descrs[tno] = TREE_DESCR_64;
5651 tl_assert(svNew != SVal_INVALID);
5652 cl->svals[cloff + 0] = svNew;
5653 cl->svals[cloff + 1] = SVal_INVALID;
5654 cl->svals[cloff + 2] = SVal_INVALID;
5655 cl->svals[cloff + 3] = SVal_INVALID;
5656 cl->svals[cloff + 4] = SVal_INVALID;
5657 cl->svals[cloff + 5] = SVal_INVALID;
5658 cl->svals[cloff + 6] = SVal_INVALID;
5659 cl->svals[cloff + 7] = SVal_INVALID;
5660 return;
5661 slowcase: /* misaligned */
5662 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005663 zsm_swrite32( a + 0, svNew );
5664 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005665}
5666
sewardj23f12002009-07-24 08:45:08 +00005667/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005668
5669static
sewardj23f12002009-07-24 08:45:08 +00005670SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005671 CacheLine* cl;
5672 UWord cloff, tno, toff;
5673 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005674 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005675 cl = get_cacheline(a);
5676 cloff = get_cacheline_offset(a);
5677 tno = get_treeno(a);
5678 toff = get_tree_offset(a); /* == 0 .. 7 */
5679 descr = cl->descrs[tno];
5680 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5681 SVal* tree = &cl->svals[tno << 3];
5682 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5683 }
5684 return cl->svals[cloff];
5685}
5686
sewardj23f12002009-07-24 08:45:08 +00005687static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00005688 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00005689 stats__cline_scopy08s++;
5690 sv = zsm_sread08( src );
5691 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00005692}
5693
5694
sewardj23f12002009-07-24 08:45:08 +00005695/* Block-copy states (needed for implementing realloc()). Note this
5696 doesn't change the filtering arrangements. The caller of
5697 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00005698
sewardj23f12002009-07-24 08:45:08 +00005699static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005700{
5701 SizeT i;
5702 if (len == 0)
5703 return;
5704
5705 /* assert for non-overlappingness */
5706 tl_assert(src+len <= dst || dst+len <= src);
5707
5708 /* To be simple, just copy byte by byte. But so as not to wreck
5709 performance for later accesses to dst[0 .. len-1], normalise
5710 destination lines as we finish with them, and also normalise the
5711 line containing the first and last address. */
5712 for (i = 0; i < len; i++) {
5713 Bool normalise
5714 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5715 || i == 0 /* first in range */
5716 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00005717 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00005718 }
5719}
5720
5721
5722/* For setting address ranges to a given value. Has considerable
5723 sophistication so as to avoid generating large numbers of pointless
5724 cache loads/writebacks for large ranges. */
5725
5726/* Do small ranges in-cache, in the obvious way. */
5727static
sewardj23f12002009-07-24 08:45:08 +00005728void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005729{
5730 /* fast track a couple of common cases */
5731 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00005732 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005733 return;
5734 }
5735 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00005736 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005737 return;
5738 }
5739
5740 /* be completely general (but as efficient as possible) */
5741 if (len == 0) return;
5742
5743 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005744 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005745 a += 1;
5746 len -= 1;
5747 tl_assert(aligned16(a));
5748 }
5749 if (len == 0) return;
5750
5751 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005752 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005753 a += 2;
5754 len -= 2;
5755 tl_assert(aligned32(a));
5756 }
5757 if (len == 0) return;
5758
5759 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005760 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005761 a += 4;
5762 len -= 4;
5763 tl_assert(aligned64(a));
5764 }
5765 if (len == 0) return;
5766
5767 if (len >= 8) {
5768 tl_assert(aligned64(a));
5769 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00005770 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005771 a += 8;
5772 len -= 8;
5773 }
5774 tl_assert(aligned64(a));
5775 }
5776 if (len == 0) return;
5777
5778 if (len >= 4)
5779 tl_assert(aligned32(a));
5780 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005781 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005782 a += 4;
5783 len -= 4;
5784 }
5785 if (len == 0) return;
5786
5787 if (len >= 2)
5788 tl_assert(aligned16(a));
5789 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005790 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005791 a += 2;
5792 len -= 2;
5793 }
5794 if (len == 0) return;
5795
5796 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005797 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00005798 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00005799 len -= 1;
5800 }
5801 tl_assert(len == 0);
5802}
5803
5804
sewardj23f12002009-07-24 08:45:08 +00005805/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00005806 for larger ranges, try to operate directly on the out-of-cache
5807 representation, rather than dragging lines into the cache,
5808 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00005809 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00005810
sewardj23f12002009-07-24 08:45:08 +00005811 Note that this doesn't change the filtering arrangements. The
5812 caller of zsm_sset_range needs to attend to that. */
5813
5814static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005815{
5816 tl_assert(svNew != SVal_INVALID);
5817 stats__cache_make_New_arange += (ULong)len;
5818
5819 if (0 && len > 500)
5820 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
5821
5822 if (0) {
5823 static UWord n_New_in_cache = 0;
5824 static UWord n_New_not_in_cache = 0;
5825 /* tag is 'a' with the in-line offset masked out,
5826 eg a[31]..a[4] 0000 */
5827 Addr tag = a & ~(N_LINE_ARANGE - 1);
5828 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5829 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5830 n_New_in_cache++;
5831 } else {
5832 n_New_not_in_cache++;
5833 }
5834 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5835 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5836 n_New_in_cache, n_New_not_in_cache );
5837 }
5838
5839 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00005840 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005841 } else {
5842 Addr before_start = a;
5843 Addr aligned_start = cacheline_ROUNDUP(a);
5844 Addr after_start = cacheline_ROUNDDN(a + len);
5845 UWord before_len = aligned_start - before_start;
5846 UWord aligned_len = after_start - aligned_start;
5847 UWord after_len = a + len - after_start;
5848 tl_assert(before_start <= aligned_start);
5849 tl_assert(aligned_start <= after_start);
5850 tl_assert(before_len < N_LINE_ARANGE);
5851 tl_assert(after_len < N_LINE_ARANGE);
5852 tl_assert(get_cacheline_offset(aligned_start) == 0);
5853 if (get_cacheline_offset(a) == 0) {
5854 tl_assert(before_len == 0);
5855 tl_assert(a == aligned_start);
5856 }
5857 if (get_cacheline_offset(a+len) == 0) {
5858 tl_assert(after_len == 0);
5859 tl_assert(after_start == a+len);
5860 }
5861 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005862 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005863 }
5864 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005865 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005866 }
5867 stats__cache_make_New_inZrep += (ULong)aligned_len;
5868
5869 while (1) {
5870 Addr tag;
5871 UWord wix;
5872 if (aligned_start >= after_start)
5873 break;
5874 tl_assert(get_cacheline_offset(aligned_start) == 0);
5875 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5876 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5877 if (tag == cache_shmem.tags0[wix]) {
5878 UWord i;
5879 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005880 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005881 } else {
5882 UWord i;
5883 Word zix;
5884 SecMap* sm;
5885 LineZ* lineZ;
5886 /* This line is not in the cache. Do not force it in; instead
5887 modify it in-place. */
5888 /* find the Z line to write in and rcdec it or the
5889 associated F line. */
5890 find_Z_for_writing( &sm, &zix, tag );
5891 tl_assert(sm);
5892 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5893 lineZ = &sm->linesZ[zix];
5894 lineZ->dict[0] = svNew;
5895 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5896 for (i = 0; i < N_LINE_ARANGE/4; i++)
5897 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5898 rcinc_LineZ(lineZ);
5899 }
5900 aligned_start += N_LINE_ARANGE;
5901 aligned_len -= N_LINE_ARANGE;
5902 }
5903 tl_assert(aligned_start == after_start);
5904 tl_assert(aligned_len == 0);
5905 }
5906}
5907
5908
5909/////////////////////////////////////////////////////////
5910// //
sewardj23f12002009-07-24 08:45:08 +00005911// Front-filtering accesses //
5912// //
5913/////////////////////////////////////////////////////////
5914
5915static UWord stats__f_ac = 0;
5916static UWord stats__f_sk = 0;
5917
5918#if 0
5919# define STATS__F_SHOW \
5920 do { \
5921 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5922 VG_(printf)("filters: ac %lu sk %lu\n", \
5923 stats__f_ac, stats__f_sk); \
5924 } while (0)
5925#else
5926# define STATS__F_SHOW /* */
5927#endif
5928
5929void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5930 stats__f_ac++;
5931 STATS__F_SHOW;
5932 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5933 stats__f_sk++;
5934 return;
5935 }
5936 zsm_sapply08__msmcwrite(thr, a);
5937}
5938
5939void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5940 stats__f_ac++;
5941 STATS__F_SHOW;
5942 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5943 stats__f_sk++;
5944 return;
5945 }
5946 zsm_sapply16__msmcwrite(thr, a);
5947}
5948
5949void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5950 stats__f_ac++;
5951 STATS__F_SHOW;
5952 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5953 stats__f_sk++;
5954 return;
5955 }
5956 zsm_sapply32__msmcwrite(thr, a);
5957}
5958
5959void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5960 stats__f_ac++;
5961 STATS__F_SHOW;
5962 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5963 stats__f_sk++;
5964 return;
5965 }
5966 zsm_sapply64__msmcwrite(thr, a);
5967}
5968
5969void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5970{
5971 /* fast track a couple of common cases */
5972 if (len == 4 && aligned32(a)) {
5973 zsm_sapply32_f__msmcwrite( thr, a );
5974 return;
5975 }
5976 if (len == 8 && aligned64(a)) {
5977 zsm_sapply64_f__msmcwrite( thr, a );
5978 return;
5979 }
5980
5981 /* be completely general (but as efficient as possible) */
5982 if (len == 0) return;
5983
5984 if (!aligned16(a) && len >= 1) {
5985 zsm_sapply08_f__msmcwrite( thr, a );
5986 a += 1;
5987 len -= 1;
5988 tl_assert(aligned16(a));
5989 }
5990 if (len == 0) return;
5991
5992 if (!aligned32(a) && len >= 2) {
5993 zsm_sapply16_f__msmcwrite( thr, a );
5994 a += 2;
5995 len -= 2;
5996 tl_assert(aligned32(a));
5997 }
5998 if (len == 0) return;
5999
6000 if (!aligned64(a) && len >= 4) {
6001 zsm_sapply32_f__msmcwrite( thr, a );
6002 a += 4;
6003 len -= 4;
6004 tl_assert(aligned64(a));
6005 }
6006 if (len == 0) return;
6007
6008 if (len >= 8) {
6009 tl_assert(aligned64(a));
6010 while (len >= 8) {
6011 zsm_sapply64_f__msmcwrite( thr, a );
6012 a += 8;
6013 len -= 8;
6014 }
6015 tl_assert(aligned64(a));
6016 }
6017 if (len == 0) return;
6018
6019 if (len >= 4)
6020 tl_assert(aligned32(a));
6021 if (len >= 4) {
6022 zsm_sapply32_f__msmcwrite( thr, a );
6023 a += 4;
6024 len -= 4;
6025 }
6026 if (len == 0) return;
6027
6028 if (len >= 2)
6029 tl_assert(aligned16(a));
6030 if (len >= 2) {
6031 zsm_sapply16_f__msmcwrite( thr, a );
6032 a += 2;
6033 len -= 2;
6034 }
6035 if (len == 0) return;
6036
6037 if (len >= 1) {
6038 zsm_sapply08_f__msmcwrite( thr, a );
6039 //a += 1;
6040 len -= 1;
6041 }
6042 tl_assert(len == 0);
6043}
6044
6045void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
6046 stats__f_ac++;
6047 STATS__F_SHOW;
6048 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
6049 stats__f_sk++;
6050 return;
6051 }
6052 zsm_sapply08__msmcread(thr, a);
6053}
6054
6055void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
6056 stats__f_ac++;
6057 STATS__F_SHOW;
6058 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
6059 stats__f_sk++;
6060 return;
6061 }
6062 zsm_sapply16__msmcread(thr, a);
6063}
6064
6065void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
6066 stats__f_ac++;
6067 STATS__F_SHOW;
6068 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
6069 stats__f_sk++;
6070 return;
6071 }
6072 zsm_sapply32__msmcread(thr, a);
6073}
6074
6075void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
6076 stats__f_ac++;
6077 STATS__F_SHOW;
6078 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
6079 stats__f_sk++;
6080 return;
6081 }
6082 zsm_sapply64__msmcread(thr, a);
6083}
6084
6085void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
6086{
6087 /* fast track a couple of common cases */
6088 if (len == 4 && aligned32(a)) {
6089 zsm_sapply32_f__msmcread( thr, a );
6090 return;
6091 }
6092 if (len == 8 && aligned64(a)) {
6093 zsm_sapply64_f__msmcread( thr, a );
6094 return;
6095 }
6096
6097 /* be completely general (but as efficient as possible) */
6098 if (len == 0) return;
6099
6100 if (!aligned16(a) && len >= 1) {
6101 zsm_sapply08_f__msmcread( thr, a );
6102 a += 1;
6103 len -= 1;
6104 tl_assert(aligned16(a));
6105 }
6106 if (len == 0) return;
6107
6108 if (!aligned32(a) && len >= 2) {
6109 zsm_sapply16_f__msmcread( thr, a );
6110 a += 2;
6111 len -= 2;
6112 tl_assert(aligned32(a));
6113 }
6114 if (len == 0) return;
6115
6116 if (!aligned64(a) && len >= 4) {
6117 zsm_sapply32_f__msmcread( thr, a );
6118 a += 4;
6119 len -= 4;
6120 tl_assert(aligned64(a));
6121 }
6122 if (len == 0) return;
6123
6124 if (len >= 8) {
6125 tl_assert(aligned64(a));
6126 while (len >= 8) {
6127 zsm_sapply64_f__msmcread( thr, a );
6128 a += 8;
6129 len -= 8;
6130 }
6131 tl_assert(aligned64(a));
6132 }
6133 if (len == 0) return;
6134
6135 if (len >= 4)
6136 tl_assert(aligned32(a));
6137 if (len >= 4) {
6138 zsm_sapply32_f__msmcread( thr, a );
6139 a += 4;
6140 len -= 4;
6141 }
6142 if (len == 0) return;
6143
6144 if (len >= 2)
6145 tl_assert(aligned16(a));
6146 if (len >= 2) {
6147 zsm_sapply16_f__msmcread( thr, a );
6148 a += 2;
6149 len -= 2;
6150 }
6151 if (len == 0) return;
6152
6153 if (len >= 1) {
6154 zsm_sapply08_f__msmcread( thr, a );
6155 //a += 1;
6156 len -= 1;
6157 }
6158 tl_assert(len == 0);
6159}
6160
6161void libhb_Thr_resumes ( Thr* thr )
6162{
6163 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006164 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006165 tl_assert(!thr->llexit_done);
sewardj23f12002009-07-24 08:45:08 +00006166 Filter__clear(thr->filter, "libhb_Thr_resumes");
6167 /* A kludge, but .. if this thread doesn't have any marker stacks
6168 at all, get one right now. This is easier than figuring out
6169 exactly when at thread startup we can and can't take a stack
6170 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00006171 if (HG_(clo_history_level) == 1) {
6172 tl_assert(thr->local_Kws_n_stacks);
6173 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
6174 note_local_Kw_n_stack_for(thr);
6175 }
sewardj23f12002009-07-24 08:45:08 +00006176}
6177
6178
6179/////////////////////////////////////////////////////////
6180// //
sewardjf98e1c02008-10-25 16:22:41 +00006181// Synchronisation objects //
6182// //
6183/////////////////////////////////////////////////////////
6184
sewardjffce8152011-06-24 10:09:41 +00006185/* A double linked list of all the SO's. */
6186SO* admin_SO = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006187
sewardjffce8152011-06-24 10:09:41 +00006188static SO* SO__Alloc ( void )
6189{
sewardjf98e1c02008-10-25 16:22:41 +00006190 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6191 so->viR = VtsID_INVALID;
6192 so->viW = VtsID_INVALID;
6193 so->magic = SO_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00006194 /* Add to double linked list */
6195 if (admin_SO) {
6196 tl_assert(admin_SO->admin_prev == NULL);
6197 admin_SO->admin_prev = so;
6198 so->admin_next = admin_SO;
6199 } else {
6200 so->admin_next = NULL;
6201 }
6202 so->admin_prev = NULL;
6203 admin_SO = so;
6204 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006205 return so;
6206}
sewardjffce8152011-06-24 10:09:41 +00006207
6208static void SO__Dealloc ( SO* so )
6209{
sewardjf98e1c02008-10-25 16:22:41 +00006210 tl_assert(so);
6211 tl_assert(so->magic == SO_MAGIC);
6212 if (so->viR == VtsID_INVALID) {
6213 tl_assert(so->viW == VtsID_INVALID);
6214 } else {
6215 tl_assert(so->viW != VtsID_INVALID);
6216 VtsID__rcdec(so->viR);
6217 VtsID__rcdec(so->viW);
6218 }
6219 so->magic = 0;
sewardjffce8152011-06-24 10:09:41 +00006220 /* Del from double linked list */
6221 if (so->admin_prev)
6222 so->admin_prev->admin_next = so->admin_next;
6223 if (so->admin_next)
6224 so->admin_next->admin_prev = so->admin_prev;
6225 if (so == admin_SO)
6226 admin_SO = so->admin_next;
6227 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006228 HG_(free)( so );
6229}
6230
6231
6232/////////////////////////////////////////////////////////
6233// //
6234// Top Level API //
6235// //
6236/////////////////////////////////////////////////////////
6237
florian6bd9dc12012-11-23 16:17:43 +00006238static void show_thread_state ( const HChar* str, Thr* t )
sewardjf98e1c02008-10-25 16:22:41 +00006239{
6240 if (1) return;
6241 if (t->viR == t->viW) {
6242 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6243 VtsID__pp( t->viR );
6244 VG_(printf)("%s","\n");
6245 } else {
6246 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6247 VtsID__pp( t->viR );
6248 VG_(printf)(" viW %u==", t->viW);
6249 VtsID__pp( t->viW );
6250 VG_(printf)("%s","\n");
6251 }
6252}
6253
6254
6255Thr* libhb_init (
6256 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00006257 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00006258 )
6259{
6260 Thr* thr;
6261 VtsID vi;
sewardje4cce742011-02-24 15:25:24 +00006262
6263 // We will have to have to store a large number of these,
6264 // so make sure they're the size we expect them to be.
6265 tl_assert(sizeof(ScalarTS) == 8);
sewardjffce8152011-06-24 10:09:41 +00006266
6267 /* because first 1024 unusable */
6268 tl_assert(SCALARTS_N_THRBITS >= 11);
6269 /* so as to fit in a UInt w/ 3 bits to spare (see defn of
6270 Thr_n_RCEC). */
6271 tl_assert(SCALARTS_N_THRBITS <= 29);
6272
6273 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6274 (32-bit). It's not correctness-critical, but there are a lot of
6275 them, so it's important from a space viewpoint. Unfortunately
6276 we simply can't pack it into 2 words on a 32-bit target. */
6277 if (sizeof(UWord) == 8) {
6278 tl_assert(sizeof(Thr_n_RCEC) == 16);
6279 } else {
6280 tl_assert(sizeof(Thr_n_RCEC) == 12);
6281 }
6282
6283 /* Word sets really are 32 bits. Even on a 64 bit target. */
6284 tl_assert(sizeof(WordSetID) == 4);
6285 tl_assert(sizeof(WordSet) == sizeof(WordSetID));
sewardje4cce742011-02-24 15:25:24 +00006286
sewardjf98e1c02008-10-25 16:22:41 +00006287 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00006288 tl_assert(get_EC);
6289 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00006290 main_get_EC = get_EC;
6291
6292 // No need to initialise hg_wordfm.
6293 // No need to initialise hg_wordset.
6294
sewardj7aa38a92011-02-27 23:04:12 +00006295 /* Allocated once and never deallocated. Used as a temporary in
6296 VTS singleton, tick and join operations. */
6297 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6298 temp_max_sized_VTS->id = VtsID_INVALID;
philippec3508652015-03-28 12:01:58 +00006299 verydead_thread_tables_init();
sewardjf98e1c02008-10-25 16:22:41 +00006300 vts_set_init();
6301 vts_tab_init();
6302 event_map_init();
6303 VtsID__invalidate_caches();
6304
6305 // initialise shadow memory
philippe1475a7f2015-05-11 19:45:08 +00006306 zsm_init( );
sewardjf98e1c02008-10-25 16:22:41 +00006307
6308 thr = Thr__new();
6309 vi = VtsID__mk_Singleton( thr, 1 );
6310 thr->viR = vi;
6311 thr->viW = vi;
6312 VtsID__rcinc(thr->viR);
6313 VtsID__rcinc(thr->viW);
6314
6315 show_thread_state(" root", thr);
6316 return thr;
6317}
6318
sewardj23f12002009-07-24 08:45:08 +00006319
sewardjf98e1c02008-10-25 16:22:41 +00006320Thr* libhb_create ( Thr* parent )
6321{
6322 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6323 the child's index. Since the child's index is guaranteed
6324 unique, it has never been seen before, so the implicit value
6325 before the tick is zero and after that is one. */
6326 Thr* child = Thr__new();
6327
6328 child->viR = VtsID__tick( parent->viR, child );
6329 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00006330 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00006331 VtsID__rcinc(child->viR);
6332 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006333 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00006334 early for that - it may not have a valid TId yet. So, let
6335 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00006336
6337 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6338 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6339
6340 /* and the parent has to move along too */
6341 VtsID__rcdec(parent->viR);
6342 VtsID__rcdec(parent->viW);
6343 parent->viR = VtsID__tick( parent->viR, parent );
6344 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00006345 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00006346 VtsID__rcinc(parent->viR);
6347 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006348 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00006349
6350 show_thread_state(" child", child);
6351 show_thread_state("parent", parent);
6352
6353 return child;
6354}
6355
6356/* Shut down the library, and print stats (in fact that's _all_
6357 this is for. */
6358void libhb_shutdown ( Bool show_stats )
6359{
6360 if (show_stats) {
6361 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6362 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6363 stats__secmaps_allocd,
6364 stats__secmap_ga_space_covered);
6365 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6366 stats__secmap_linesZ_allocd,
6367 stats__secmap_linesZ_bytes);
philippe0fb30ac2015-05-15 13:17:17 +00006368 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)"
6369 " (%'10lu used)\n",
philippe71ed3c92015-05-17 19:32:42 +00006370 VG_(sizePA) (LineF_pool_allocator),
6371 VG_(sizePA) (LineF_pool_allocator) * sizeof(LineF),
philippe0fb30ac2015-05-15 13:17:17 +00006372 shmem__SecMap_used_linesF());
philippef54cb662015-05-10 22:19:31 +00006373 VG_(printf)(" secmaps: %'10lu in map (can be scanGCed %'5lu)"
6374 " #%lu scanGC \n",
6375 stats__secmaps_in_map_shmem,
6376 shmem__SecMap_do_GC(False /* really do GC */),
6377 stats__secmaps_scanGC);
6378 tl_assert (VG_(sizeFM) (map_shmem) == stats__secmaps_in_map_shmem);
6379 VG_(printf)(" secmaps: %'10lu in freelist,"
6380 " total (scanGCed %'lu, ssetGCed %'lu)\n",
6381 SecMap_freelist_length(),
6382 stats__secmaps_scanGCed,
6383 stats__secmaps_ssetGCed);
sewardjf98e1c02008-10-25 16:22:41 +00006384 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6385 stats__secmaps_search, stats__secmaps_search_slow);
6386
6387 VG_(printf)("%s","\n");
6388 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6389 stats__cache_totrefs, stats__cache_totmisses );
6390 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6391 stats__cache_Z_fetches, stats__cache_F_fetches );
6392 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6393 stats__cache_Z_wbacks, stats__cache_F_wbacks );
philippef54cb662015-05-10 22:19:31 +00006394 VG_(printf)(" cache: %'14lu flushes_invals\n",
6395 stats__cache_flushes_invals );
sewardjf98e1c02008-10-25 16:22:41 +00006396 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6397 stats__cache_make_New_arange,
6398 stats__cache_make_New_inZrep);
6399
6400 VG_(printf)("%s","\n");
6401 VG_(printf)(" cline: %'10lu normalises\n",
6402 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00006403 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6404 stats__cline_cread64s,
6405 stats__cline_cread32s,
6406 stats__cline_cread16s,
6407 stats__cline_cread08s );
6408 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6409 stats__cline_cwrite64s,
6410 stats__cline_cwrite32s,
6411 stats__cline_cwrite16s,
6412 stats__cline_cwrite08s );
6413 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6414 stats__cline_swrite64s,
6415 stats__cline_swrite32s,
6416 stats__cline_swrite16s,
6417 stats__cline_swrite08s );
6418 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6419 stats__cline_sread08s, stats__cline_scopy08s );
philippef54cb662015-05-10 22:19:31 +00006420 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu"
6421 " 2to1 %'12lu\n",
6422 stats__cline_64to32splits, stats__cline_32to16splits,
6423 stats__cline_16to8splits );
6424 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu"
6425 " 2to1 %'12lu\n",
6426 stats__cline_64to32pulldown, stats__cline_32to16pulldown,
6427 stats__cline_16to8pulldown );
sewardjf98e1c02008-10-25 16:22:41 +00006428 if (0)
philippef54cb662015-05-10 22:19:31 +00006429 VG_(printf)(" cline: sizeof(CacheLineZ) %ld,"
6430 " covers %ld bytes of arange\n",
6431 (Word)sizeof(LineZ),
6432 (Word)N_LINE_ARANGE);
sewardjf98e1c02008-10-25 16:22:41 +00006433
6434 VG_(printf)("%s","\n");
6435
sewardjc8028ad2010-05-05 09:34:42 +00006436 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006437 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00006438 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006439 stats__msmcwrite, stats__msmcwrite_change);
6440 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6441 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00006442 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6443 stats__join2_queries, stats__join2_misses);
6444
6445 VG_(printf)("%s","\n");
philippef54cb662015-05-10 22:19:31 +00006446 VG_(printf)(" libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6447 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6448 VG_(printf)(" libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6449 stats__vts__cmp_structural, stats__vts__cmp_structural_slow);
6450 VG_(printf)(" libhb: VTSset: find__or__clone_and_add %'lu"
6451 " (%'lu allocd)\n",
sewardj7aa38a92011-02-27 23:04:12 +00006452 stats__vts_set__focaa, stats__vts_set__focaa_a );
sewardjc8028ad2010-05-05 09:34:42 +00006453 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6454 stats__vts__indexat_slow );
6455
6456 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00006457 VG_(printf)(
6458 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6459 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6460 );
philippe2bd23262015-05-11 20:56:49 +00006461 VG_(printf)(" libhb: #%lu vts_tab GC #%lu vts pruning\n",
6462 stats__vts_tab_GC, stats__vts_pruning);
sewardjf98e1c02008-10-25 16:22:41 +00006463 VG_(printf)( " libhb: %lu entries in vts_set\n",
6464 VG_(sizeFM)( vts_set ) );
6465
6466 VG_(printf)("%s","\n");
philippe900c5352015-03-24 14:02:44 +00006467 {
6468 UInt live = 0;
6469 UInt llexit_done = 0;
6470 UInt joinedwith_done = 0;
6471 UInt llexit_and_joinedwith_done = 0;
6472
6473 Thread* hgthread = get_admin_threads();
6474 tl_assert(hgthread);
6475 while (hgthread) {
6476 Thr* hbthr = hgthread->hbthr;
6477 tl_assert(hbthr);
6478 if (hbthr->llexit_done && hbthr->joinedwith_done)
6479 llexit_and_joinedwith_done++;
6480 else if (hbthr->llexit_done)
6481 llexit_done++;
6482 else if (hbthr->joinedwith_done)
6483 joinedwith_done++;
6484 else
6485 live++;
6486 hgthread = hgthread->admin;
6487 }
6488 VG_(printf)(" libhb: threads live: %d exit_and_joinedwith %d"
6489 " exit %d joinedwith %d\n",
6490 live, llexit_and_joinedwith_done,
6491 llexit_done, joinedwith_done);
philippec3508652015-03-28 12:01:58 +00006492 VG_(printf)(" libhb: %d verydead_threads, "
6493 "%d verydead_threads_not_pruned\n",
6494 (int) VG_(sizeXA)( verydead_thread_table),
6495 (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6496 tl_assert (VG_(sizeXA)( verydead_thread_table)
6497 + VG_(sizeXA)( verydead_thread_table_not_pruned)
6498 == llexit_and_joinedwith_done);
philippe900c5352015-03-24 14:02:44 +00006499 }
6500
6501 VG_(printf)("%s","\n");
philipped005b2c2015-04-21 21:58:14 +00006502 {
6503 UWord OldRef_accs_n[N_OLDREF_ACCS+1];
6504 UInt accs_n;
6505 UWord OldRef_n;
6506 UInt i;
6507
6508 OldRef_n = 0;
6509 for (i = 0; i <= N_OLDREF_ACCS; i++)
6510 OldRef_accs_n[i] = 0;
6511
6512 for (OldRef* o = mru.prev; o != &lru; o = o->prev) {
6513 OldRef_n++;
6514 accs_n = 0;
6515 for (i = 0; i < N_OLDREF_ACCS; i++) {
6516 if (o->accs[i].thrid != 0)
6517 accs_n++;
6518 }
6519 OldRef_accs_n[accs_n]++;
6520 }
6521
6522 tl_assert(OldRef_n == oldrefTreeN);
6523 VG_(printf)( " libhb: oldrefTreeN %lu ", oldrefTreeN);
6524 VG_(printf)( "( ");
6525 for (i = 0; i <= N_OLDREF_ACCS; i++)
6526 VG_(printf)( "accs[%d]=%lu ", i, OldRef_accs_n[i]);
6527 VG_(printf)( ")\n");
6528 }
sewardjf98e1c02008-10-25 16:22:41 +00006529 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
6530 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
6531 stats__ctxt_rcdec2,
6532 stats__ctxt_rcdec3 );
6533 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
6534 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
philippecabdbb52015-04-20 21:33:16 +00006535 VG_(printf)( " libhb: contextTab: %lu slots,"
6536 " %lu cur ents(ref'd %lu),"
philippe06bc23a2015-04-17 21:19:43 +00006537 " %lu max ents\n",
sewardjf98e1c02008-10-25 16:22:41 +00006538 (UWord)N_RCEC_TAB,
philippecabdbb52015-04-20 21:33:16 +00006539 stats__ctxt_tab_curr, RCEC_referenced,
6540 stats__ctxt_tab_max );
philippe47124e92015-04-25 14:00:24 +00006541 {
6542# define MAXCHAIN 10
6543 UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
6544 UInt non0chain = 0;
6545 UInt n;
6546 UInt i;
6547 RCEC *p;
6548
6549 for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
6550 for (i = 0; i < N_RCEC_TAB; i++) {
6551 n = 0;
6552 for (p = contextTab[i]; p; p = p->next)
6553 n++;
6554 if (n < MAXCHAIN)
6555 chains[n]++;
6556 else
6557 chains[MAXCHAIN]++;
6558 if (n > 0)
6559 non0chain++;
6560 }
6561 VG_(printf)( " libhb: contextTab chain of [length]=nchain."
6562 " Avg chain len %3.1f\n"
6563 " ",
6564 (Double)stats__ctxt_tab_curr
6565 / (Double)(non0chain ? non0chain : 1));
6566 for (i = 0; i <= MAXCHAIN; i++) {
6567 if (chains[i] != 0)
6568 VG_(printf)( "[%d%s]=%d ",
6569 i, i == MAXCHAIN ? "+" : "",
6570 chains[i]);
6571 }
6572 VG_(printf)( "\n");
6573# undef MAXCHAIN
6574 }
sewardjf98e1c02008-10-25 16:22:41 +00006575 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6576 stats__ctxt_tab_qs,
6577 stats__ctxt_tab_cmps );
6578#if 0
6579 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6580 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6581 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6582 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6583 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6584 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6585 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6586 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6587 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6588 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6589 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6590 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6591 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6592 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6593
6594 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6595 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6596 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6597 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6598#endif
6599
6600 VG_(printf)("%s","<<< END libhb stats >>>\n");
6601 VG_(printf)("%s","\n");
6602
6603 }
6604}
6605
sewardjffce8152011-06-24 10:09:41 +00006606/* Receive notification that a thread has low level exited. The
6607 significance here is that we do not expect to see any more memory
6608 references from it. */
sewardjf98e1c02008-10-25 16:22:41 +00006609void libhb_async_exit ( Thr* thr )
6610{
sewardj23f12002009-07-24 08:45:08 +00006611 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006612 tl_assert(!thr->llexit_done);
6613 thr->llexit_done = True;
sewardj2d2ea2f2009-08-02 10:15:07 +00006614
6615 /* free up Filter and local_Kws_n_stacks (well, actually not the
6616 latter ..) */
6617 tl_assert(thr->filter);
6618 HG_(free)(thr->filter);
6619 thr->filter = NULL;
6620
sewardjffce8152011-06-24 10:09:41 +00006621 /* Tell the VTS mechanism this thread has exited, so it can
6622 participate in VTS pruning. Note this can only happen if the
6623 thread has both ll_exited and has been joined with. */
6624 if (thr->joinedwith_done)
6625 VTS__declare_thread_very_dead(thr);
6626
sewardj2d2ea2f2009-08-02 10:15:07 +00006627 /* Another space-accuracy tradeoff. Do we want to be able to show
6628 H1 history for conflicts in threads which have since exited? If
6629 yes, then we better not free up thr->local_Kws_n_stacks. The
6630 downside is a potential per-thread leak of up to
6631 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6632 XArray average overcommit factor is (1.5 I'd guess). */
6633 // hence:
6634 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6635 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006636}
6637
sewardjffce8152011-06-24 10:09:41 +00006638/* Receive notification that a thread has been joined with. The
6639 significance here is that we do not expect to see any further
6640 references to its vector clocks (Thr::viR and Thr::viW). */
6641void libhb_joinedwith_done ( Thr* thr )
6642{
6643 tl_assert(thr);
6644 /* Caller must ensure that this is only ever called once per Thr. */
6645 tl_assert(!thr->joinedwith_done);
6646 thr->joinedwith_done = True;
6647 if (thr->llexit_done)
6648 VTS__declare_thread_very_dead(thr);
6649}
6650
6651
sewardjf98e1c02008-10-25 16:22:41 +00006652/* Both Segs and SOs point to VTSs. However, there is no sharing, so
6653 a Seg that points at a VTS is its one-and-only owner, and ditto for
6654 a SO that points at a VTS. */
6655
6656SO* libhb_so_alloc ( void )
6657{
6658 return SO__Alloc();
6659}
6660
6661void libhb_so_dealloc ( SO* so )
6662{
6663 tl_assert(so);
6664 tl_assert(so->magic == SO_MAGIC);
6665 SO__Dealloc(so);
6666}
6667
6668/* See comments in libhb.h for details on the meaning of
6669 strong vs weak sends and strong vs weak receives. */
6670void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6671{
6672 /* Copy the VTSs from 'thr' into the sync object, and then move
6673 the thread along one step. */
6674
6675 tl_assert(so);
6676 tl_assert(so->magic == SO_MAGIC);
6677
6678 /* stay sane .. a thread's read-clock must always lead or be the
6679 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00006680 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6681 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00006682 }
6683
6684 /* since we're overwriting the VtsIDs in the SO, we need to drop
6685 any references made by the previous contents thereof */
6686 if (so->viR == VtsID_INVALID) {
6687 tl_assert(so->viW == VtsID_INVALID);
6688 so->viR = thr->viR;
6689 so->viW = thr->viW;
6690 VtsID__rcinc(so->viR);
6691 VtsID__rcinc(so->viW);
6692 } else {
6693 /* In a strong send, we dump any previous VC in the SO and
6694 install the sending thread's VC instead. For a weak send we
6695 must join2 with what's already there. */
6696 tl_assert(so->viW != VtsID_INVALID);
6697 VtsID__rcdec(so->viR);
6698 VtsID__rcdec(so->viW);
6699 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6700 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6701 VtsID__rcinc(so->viR);
6702 VtsID__rcinc(so->viW);
6703 }
6704
6705 /* move both parent clocks along */
6706 VtsID__rcdec(thr->viR);
6707 VtsID__rcdec(thr->viW);
6708 thr->viR = VtsID__tick( thr->viR, thr );
6709 thr->viW = VtsID__tick( thr->viW, thr );
sewardjffce8152011-06-24 10:09:41 +00006710 if (!thr->llexit_done) {
sewardj2d2ea2f2009-08-02 10:15:07 +00006711 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00006712 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006713 }
sewardjf98e1c02008-10-25 16:22:41 +00006714 VtsID__rcinc(thr->viR);
6715 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006716
sewardjf98e1c02008-10-25 16:22:41 +00006717 if (strong_send)
6718 show_thread_state("s-send", thr);
6719 else
6720 show_thread_state("w-send", thr);
6721}
6722
6723void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6724{
6725 tl_assert(so);
6726 tl_assert(so->magic == SO_MAGIC);
6727
6728 if (so->viR != VtsID_INVALID) {
6729 tl_assert(so->viW != VtsID_INVALID);
6730
6731 /* Weak receive (basically, an R-acquisition of a R-W lock).
6732 This advances the read-clock of the receiver, but not the
6733 write-clock. */
6734 VtsID__rcdec(thr->viR);
6735 thr->viR = VtsID__join2( thr->viR, so->viR );
6736 VtsID__rcinc(thr->viR);
6737
sewardj90eb22e2009-07-28 20:22:18 +00006738 /* At one point (r10589) it seemed safest to tick the clocks for
6739 the receiving thread after the join. But on reflection, I
6740 wonder if that might cause it to 'overtake' constraints,
6741 which could lead to missing races. So, back out that part of
6742 r10589. */
6743 //VtsID__rcdec(thr->viR);
6744 //thr->viR = VtsID__tick( thr->viR, thr );
6745 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00006746
sewardjf98e1c02008-10-25 16:22:41 +00006747 /* For a strong receive, we also advance the receiver's write
6748 clock, which means the receive as a whole is essentially
6749 equivalent to a W-acquisition of a R-W lock. */
6750 if (strong_recv) {
6751 VtsID__rcdec(thr->viW);
6752 thr->viW = VtsID__join2( thr->viW, so->viW );
6753 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006754
sewardj90eb22e2009-07-28 20:22:18 +00006755 /* See comment just above, re r10589. */
6756 //VtsID__rcdec(thr->viW);
6757 //thr->viW = VtsID__tick( thr->viW, thr );
6758 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00006759 }
6760
sewardjf4845dc2010-05-28 20:09:59 +00006761 if (thr->filter)
6762 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00006763 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00006764
sewardjf98e1c02008-10-25 16:22:41 +00006765 if (strong_recv)
6766 show_thread_state("s-recv", thr);
6767 else
6768 show_thread_state("w-recv", thr);
6769
6770 } else {
6771 tl_assert(so->viW == VtsID_INVALID);
6772 /* Deal with degenerate case: 'so' has no vts, so there has been
6773 no message posted to it. Just ignore this case. */
6774 show_thread_state("d-recv", thr);
6775 }
6776}
6777
6778Bool libhb_so_everSent ( SO* so )
6779{
6780 if (so->viR == VtsID_INVALID) {
6781 tl_assert(so->viW == VtsID_INVALID);
6782 return False;
6783 } else {
6784 tl_assert(so->viW != VtsID_INVALID);
6785 return True;
6786 }
6787}
6788
6789#define XXX1 0 // 0x67a106c
6790#define XXX2 0
6791
sewardj23f12002009-07-24 08:45:08 +00006792static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00006793 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6794 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6795 return False;
6796}
florian0c8a47c2013-10-01 20:10:21 +00006797static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
florian6bf37262012-10-21 03:23:36 +00006798{
sewardj23f12002009-07-24 08:45:08 +00006799 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00006800 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6801 show_thread_state("", thr);
6802 VG_(printf)("%s","\n");
6803}
6804
sewardj23f12002009-07-24 08:45:08 +00006805void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006806{
6807 SVal sv = SVal__mkC(thr->viW, thr->viW);
6808 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00006809 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6810 zsm_sset_range( a, szB, sv );
6811 Filter__clear_range( thr->filter, a, szB );
6812 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00006813}
6814
sewardjfd35d492011-03-17 19:39:55 +00006815void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006816{
sewardj23f12002009-07-24 08:45:08 +00006817 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00006818}
6819
philippef54cb662015-05-10 22:19:31 +00006820
6821/* Set the lines zix_start till zix_end to NOACCESS. */
6822static void zsm_secmap_line_range_noaccess (SecMap *sm,
6823 UInt zix_start, UInt zix_end)
6824{
6825 for (UInt lz = zix_start; lz <= zix_end; lz++) {
6826 LineZ* lineZ;
philippef54cb662015-05-10 22:19:31 +00006827 lineZ = &sm->linesZ[lz];
6828 if (lineZ->dict[0] != SVal_INVALID) {
6829 rcdec_LineZ(lineZ);
philippe71ed3c92015-05-17 19:32:42 +00006830 lineZ->dict[0] = SVal_NOACCESS;
6831 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
philippef54cb662015-05-10 22:19:31 +00006832 } else {
philippe71ed3c92015-05-17 19:32:42 +00006833 clear_LineF_of_Z(lineZ);
philippef54cb662015-05-10 22:19:31 +00006834 }
philippef54cb662015-05-10 22:19:31 +00006835 for (UInt i = 0; i < N_LINE_ARANGE/4; i++)
6836 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
6837 }
6838}
6839
6840/* Set the given range to SVal_NOACCESS in-place in the secmap.
6841 a must be cacheline aligned. len must be a multiple of a cacheline
6842 and must be < N_SECMAP_ARANGE. */
6843static void zsm_sset_range_noaccess_in_secmap(Addr a, SizeT len)
6844{
6845 tl_assert (is_valid_scache_tag (a));
6846 tl_assert (0 == (len & (N_LINE_ARANGE - 1)));
6847 tl_assert (len < N_SECMAP_ARANGE);
6848
6849 SecMap *sm1 = shmem__find_SecMap (a);
6850 SecMap *sm2 = shmem__find_SecMap (a + len - 1);
6851 UWord zix_start = shmem__get_SecMap_offset(a ) >> N_LINE_BITS;
6852 UWord zix_end = shmem__get_SecMap_offset(a + len - 1) >> N_LINE_BITS;
6853
6854 if (sm1) {
6855 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm1));
6856 zsm_secmap_line_range_noaccess (sm1, zix_start,
6857 sm1 == sm2 ? zix_end : N_SECMAP_ZLINES-1);
6858 }
6859 if (sm2 && sm1 != sm2) {
6860 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm2));
6861 zsm_secmap_line_range_noaccess (sm2, 0, zix_end);
6862 }
6863}
6864
6865/* Set the given address range to SVal_NOACCESS.
6866 The SecMaps fully set to SVal_NOACCESS will be pushed in SecMap_freelist. */
6867static void zsm_sset_range_noaccess (Addr addr, SizeT len)
6868{
6869 /*
6870 BPC = Before, Partial Cacheline, = addr
6871 (i.e. starting inside a cacheline/inside a SecMap)
6872 BFC = Before, Full Cacheline(s), but not full SecMap
6873 (i.e. starting inside a SecMap)
6874 FSM = Full SecMap(s)
6875 (i.e. starting a SecMap)
6876 AFC = After, Full Cacheline(s), but not full SecMap
6877 (i.e. first address after the full SecMap(s))
6878 APC = After, Partial Cacheline, i.e. first address after the
6879 full CacheLines).
6880 ARE = After Range End = addr+len = first address not part of the range.
6881
6882 If addr starts a Cacheline, then BPC == BFC.
6883 If addr starts a SecMap, then BPC == BFC == FSM.
6884 If addr+len starts a SecMap, then APC == ARE == AFC
6885 If addr+len starts a Cacheline, then APC == ARE
6886 */
6887 Addr ARE = addr + len;
6888 Addr BPC = addr;
6889 Addr BFC = ROUNDUP(BPC, N_LINE_ARANGE);
6890 Addr FSM = ROUNDUP(BPC, N_SECMAP_ARANGE);
6891 Addr AFC = ROUNDDN(ARE, N_SECMAP_ARANGE);
6892 Addr APC = ROUNDDN(ARE, N_LINE_ARANGE);
6893 SizeT Plen = len; // Plen will be split between the following:
6894 SizeT BPClen;
6895 SizeT BFClen;
6896 SizeT FSMlen;
6897 SizeT AFClen;
6898 SizeT APClen;
6899
6900 /* Consumes from Plen the nr of bytes between from and to.
6901 from and to must be aligned on a multiple of round.
6902 The length consumed will be a multiple of round, with
6903 a maximum of Plen. */
6904# define PlenCONSUME(from, to, round, consumed) \
6905 do { \
6906 if (from < to) { \
6907 if (to - from < Plen) \
6908 consumed = to - from; \
6909 else \
6910 consumed = ROUNDDN(Plen, round); \
6911 } else { \
6912 consumed = 0; \
6913 } \
6914 Plen -= consumed; } while (0)
6915
6916 PlenCONSUME(BPC, BFC, 1, BPClen);
6917 PlenCONSUME(BFC, FSM, N_LINE_ARANGE, BFClen);
6918 PlenCONSUME(FSM, AFC, N_SECMAP_ARANGE, FSMlen);
6919 PlenCONSUME(AFC, APC, N_LINE_ARANGE, AFClen);
6920 PlenCONSUME(APC, ARE, 1, APClen);
6921
6922 if (0)
6923 VG_(printf) ("addr %p[%ld] ARE %p"
6924 " BPC %p[%ld] BFC %p[%ld] FSM %p[%ld]"
6925 " AFC %p[%ld] APC %p[%ld]\n",
6926 (void*)addr, len, (void*)ARE,
6927 (void*)BPC, BPClen, (void*)BFC, BFClen, (void*)FSM, FSMlen,
6928 (void*)AFC, AFClen, (void*)APC, APClen);
6929
6930 tl_assert (Plen == 0);
6931
6932 /* Set to NOACCESS pieces before and after not covered by entire SecMaps. */
6933
6934 /* First we set the partial cachelines. This is done through the cache. */
6935 if (BPClen > 0)
6936 zsm_sset_range_SMALL (BPC, BPClen, SVal_NOACCESS);
6937 if (APClen > 0)
6938 zsm_sset_range_SMALL (APC, APClen, SVal_NOACCESS);
6939
6940 /* After this, we will not use the cache anymore. We will directly work
6941 in-place on the z shadow memory in SecMap(s).
6942 So, we invalidate the cachelines for the whole range we are setting
6943 to NOACCESS below. */
6944 shmem__invalidate_scache_range (BFC, APC - BFC);
6945
6946 if (BFClen > 0)
6947 zsm_sset_range_noaccess_in_secmap (BFC, BFClen);
6948 if (AFClen > 0)
6949 zsm_sset_range_noaccess_in_secmap (AFC, AFClen);
6950
6951 if (FSMlen > 0) {
6952 /* Set to NOACCESS all the SecMaps, pushing the SecMaps to the
6953 free list. */
6954 Addr sm_start = FSM;
6955 while (sm_start < AFC) {
6956 SecMap *sm = shmem__find_SecMap (sm_start);
6957 if (sm) {
6958 Addr gaKey;
6959 SecMap *fm_sm;
6960
6961 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
6962 for (UInt lz = 0; lz < N_SECMAP_ZLINES; lz++) {
philippe71ed3c92015-05-17 19:32:42 +00006963 LineZ *lineZ = &sm->linesZ[lz];
6964 if (LIKELY(lineZ->dict[0] != SVal_INVALID))
6965 rcdec_LineZ(lineZ);
6966 else
6967 clear_LineF_of_Z(lineZ);
philippef54cb662015-05-10 22:19:31 +00006968 }
6969 if (!VG_(delFromFM)(map_shmem, &gaKey, (UWord*)&fm_sm, sm_start))
6970 tl_assert (0);
6971 stats__secmaps_in_map_shmem--;
6972 tl_assert (gaKey == sm_start);
6973 tl_assert (sm == fm_sm);
6974 stats__secmaps_ssetGCed++;
6975 push_SecMap_on_freelist (sm);
6976 }
6977 sm_start += N_SECMAP_ARANGE;
6978 }
6979 tl_assert (sm_start == AFC);
6980
6981 /* The above loop might have kept copies of freed SecMap in the smCache.
6982 => clear them. */
6983 if (address_in_range(smCache[0].gaKey, FSM, FSMlen)) {
6984 smCache[0].gaKey = 1;
6985 smCache[0].sm = NULL;
6986 }
6987 if (address_in_range(smCache[1].gaKey, FSM, FSMlen)) {
6988 smCache[1].gaKey = 1;
6989 smCache[1].sm = NULL;
6990 }
6991 if (address_in_range(smCache[2].gaKey, FSM, FSMlen)) {
6992 smCache[2].gaKey = 1;
6993 smCache[2].sm = NULL;
6994 }
6995 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(SMCacheEnt));
6996 }
6997}
6998
sewardjfd35d492011-03-17 19:39:55 +00006999void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
7000{
7001 /* This really does put the requested range in NoAccess. It's
7002 expensive though. */
7003 SVal sv = SVal_NOACCESS;
7004 tl_assert(is_sane_SVal_C(sv));
philippef54cb662015-05-10 22:19:31 +00007005 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7006 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7007 else
7008 zsm_sset_range_noaccess (a, szB);
sewardjfd35d492011-03-17 19:39:55 +00007009 Filter__clear_range( thr->filter, a, szB );
7010}
7011
philippef54cb662015-05-10 22:19:31 +00007012/* Works byte at a time. Can be optimised if needed. */
7013UWord libhb_srange_get_abits (Addr a, UChar *abits, SizeT len)
7014{
7015 UWord anr = 0; // nr of bytes addressable.
7016
7017 /* Get the accessibility of each byte. Pay attention to not
7018 create SecMap or LineZ when checking if a byte is addressable.
7019
7020 Note: this is used for client request. Performance deemed not critical.
7021 So for simplicity, we work byte per byte.
7022 Performance could be improved by working with full cachelines
7023 or with full SecMap, when reaching a cacheline or secmap boundary. */
7024 for (SizeT i = 0; i < len; i++) {
7025 SVal sv = SVal_INVALID;
7026 Addr b = a + i;
7027 Addr tag = b & ~(N_LINE_ARANGE - 1);
7028 UWord wix = (b >> N_LINE_BITS) & (N_WAY_NENT - 1);
7029 UWord cloff = get_cacheline_offset(b);
7030
7031 /* Note: we do not use get_cacheline(b) to avoid creating cachelines
7032 and/or SecMap for non addressable bytes. */
7033 if (tag == cache_shmem.tags0[wix]) {
7034 CacheLine copy = cache_shmem.lyns0[wix];
7035 /* We work on a copy of the cacheline, as we do not want to
7036 record the client request as a real read.
7037 The below is somewhat similar to zsm_sapply08__msmcread but
7038 avoids side effects on the cache. */
7039 UWord toff = get_tree_offset(b); /* == 0 .. 7 */
7040 UWord tno = get_treeno(b);
7041 UShort descr = copy.descrs[tno];
7042 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
7043 SVal* tree = &copy.svals[tno << 3];
7044 copy.descrs[tno] = pulldown_to_8(tree, toff, descr);
7045 }
7046 sv = copy.svals[cloff];
7047 } else {
7048 /* Byte not found in the cacheline. Search for a SecMap. */
7049 SecMap *sm = shmem__find_SecMap(b);
7050 LineZ *lineZ;
7051 if (sm == NULL)
7052 sv = SVal_NOACCESS;
7053 else {
7054 UWord zix = shmem__get_SecMap_offset(b) >> N_LINE_BITS;
7055 lineZ = &sm->linesZ[zix];
7056 if (lineZ->dict[0] == SVal_INVALID) {
philippe71ed3c92015-05-17 19:32:42 +00007057 LineF *lineF = SVal2Ptr(lineZ->dict[1]);
7058 sv = lineF->w64s[cloff];
philippef54cb662015-05-10 22:19:31 +00007059 } else {
7060 UWord ix = read_twobit_array( lineZ->ix2s, cloff );
7061 sv = lineZ->dict[ix];
7062 }
7063 }
7064 }
7065
7066 tl_assert (sv != SVal_INVALID);
7067 if (sv == SVal_NOACCESS) {
7068 if (abits)
7069 abits[i] = 0x00;
7070 } else {
7071 if (abits)
7072 abits[i] = 0xff;
7073 anr++;
7074 }
7075 }
7076
7077 return anr;
7078}
7079
7080
sewardj406bac82010-03-03 23:03:40 +00007081void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
7082{
7083 SVal sv = SVal_NOACCESS;
7084 tl_assert(is_sane_SVal_C(sv));
7085 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
philippef54cb662015-05-10 22:19:31 +00007086 if (LIKELY(szB < 2 * N_LINE_ARANGE))
7087 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
7088 else
7089 zsm_sset_range_noaccess (a, szB);
sewardj406bac82010-03-03 23:03:40 +00007090 Filter__clear_range( thr->filter, a, szB );
7091 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
7092}
7093
sewardj0b20a152011-03-10 21:34:21 +00007094Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
sewardjf98e1c02008-10-25 16:22:41 +00007095 tl_assert(thr);
sewardj60626642011-03-10 15:14:37 +00007096 return thr->hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00007097}
7098
sewardj0b20a152011-03-10 21:34:21 +00007099void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
sewardjf98e1c02008-10-25 16:22:41 +00007100 tl_assert(thr);
sewardj0b20a152011-03-10 21:34:21 +00007101 thr->hgthread = hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00007102}
7103
sewardj23f12002009-07-24 08:45:08 +00007104void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00007105{
sewardj23f12002009-07-24 08:45:08 +00007106 zsm_scopy_range(src, dst, len);
7107 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00007108}
7109
7110void libhb_maybe_GC ( void )
7111{
philippecabdbb52015-04-20 21:33:16 +00007112 /* GC the unreferenced (zero rc) RCECs when
philippee0829e02015-04-21 20:55:40 +00007113 (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
7114 with mostly NULL ptr)
7115 and (2) approaching the max nr of RCEC (as we have in any case
7116 at least that amount of RCEC in the pool allocator)
7117 Note: the margin allows to avoid a small but constant increase
7118 of the max nr of RCEC due to the fact that libhb_maybe_GC is
7119 not called when the current nr of RCEC exactly reaches the max.
7120 and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
7121 Avoid growing too much the nr of RCEC keeps the memory use low,
7122 and avoids to have too many elements in the (fixed) contextTab hashtable.
7123 */
philippecabdbb52015-04-20 21:33:16 +00007124 if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
philippee0829e02015-04-21 20:55:40 +00007125 && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
philippef54cb662015-05-10 22:19:31 +00007126 && (stats__ctxt_tab_curr * 3)/4 > RCEC_referenced))
philippecabdbb52015-04-20 21:33:16 +00007127 do_RCEC_GC();
philippe158404e2015-04-10 19:34:14 +00007128
philippef54cb662015-05-10 22:19:31 +00007129 /* If there are still no entries available (all the table entries are full),
7130 and we hit the threshhold point, then do a GC */
7131 Bool vts_tab_GC = vts_tab_freelist == VtsID_INVALID
7132 && VG_(sizeXA)( vts_tab ) >= vts_next_GC_at;
7133 if (UNLIKELY (vts_tab_GC))
7134 vts_tab__do_GC( False/*don't show stats*/ );
7135
7136 /* scan GC the SecMaps when
7137 (1) no SecMap in the freelist
7138 and (2) the current nr of live secmaps exceeds the threshold. */
7139 if (UNLIKELY(SecMap_freelist == NULL
7140 && stats__secmaps_in_map_shmem >= next_SecMap_GC_at)) {
7141 // If we did a vts tab GC, then no need to flush the cache again.
7142 if (!vts_tab_GC)
7143 zsm_flush_cache();
7144 shmem__SecMap_do_GC(True);
7145 }
philippecabdbb52015-04-20 21:33:16 +00007146
7147 /* Check the reference counts (expensive) */
7148 if (CHECK_CEM)
7149 event_map__check_reference_counts();
sewardjf98e1c02008-10-25 16:22:41 +00007150}
7151
7152
7153/////////////////////////////////////////////////////////////////
7154/////////////////////////////////////////////////////////////////
7155// //
7156// SECTION END main library //
7157// //
7158/////////////////////////////////////////////////////////////////
7159/////////////////////////////////////////////////////////////////
7160
7161/*--------------------------------------------------------------------*/
7162/*--- end libhb_main.c ---*/
7163/*--------------------------------------------------------------------*/