blob: d161a6a5391a8be76e1e724fa405306657de6f7f [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
sewardj0f157dd2013-10-18 14:27:36 +000012 Copyright (C) 2008-2013 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000034#include "pub_tool_poolalloc.h"
sewardjf98e1c02008-10-25 16:22:41 +000035#include "pub_tool_libcassert.h"
36#include "pub_tool_libcbase.h"
37#include "pub_tool_libcprint.h"
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000040#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000041#include "pub_tool_xarray.h"
42#include "pub_tool_oset.h"
43#include "pub_tool_threadstate.h"
44#include "pub_tool_aspacemgr.h"
45#include "pub_tool_execontext.h"
46#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000047#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000048#include "hg_basics.h"
49#include "hg_wordset.h"
50#include "hg_lock_n_thread.h"
51#include "hg_errors.h"
52
53#include "libhb.h"
54
55
sewardj8f5374e2008-12-07 11:40:17 +000056/////////////////////////////////////////////////////////////////
57/////////////////////////////////////////////////////////////////
58// //
59// Debugging #defines //
60// //
61/////////////////////////////////////////////////////////////////
62/////////////////////////////////////////////////////////////////
63
64/* Check the sanity of shadow values in the core memory state
65 machine. Change #if 0 to #if 1 to enable this. */
66#if 0
67# define CHECK_MSM 1
68#else
69# define CHECK_MSM 0
70#endif
71
72
73/* Check sanity (reference counts, etc) in the conflicting access
74 machinery. Change #if 0 to #if 1 to enable this. */
75#if 0
76# define CHECK_CEM 1
77#else
78# define CHECK_CEM 0
79#endif
80
81
82/* Check sanity in the compressed shadow memory machinery,
83 particularly in its caching innards. Unfortunately there's no
84 almost-zero-cost way to make them selectable at run time. Hence
85 set the #if 0 to #if 1 and rebuild if you want them. */
86#if 0
87# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
88# define inline __attribute__((noinline))
89 /* probably want to ditch -fomit-frame-pointer too */
90#else
91# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
92#endif
93
94
95/////////////////////////////////////////////////////////////////
96/////////////////////////////////////////////////////////////////
97// //
sewardjffce8152011-06-24 10:09:41 +000098// data decls: VtsID //
99// //
100/////////////////////////////////////////////////////////////////
101/////////////////////////////////////////////////////////////////
102
103/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
104 bits, since they have to be packed into the lowest 30 bits of an
105 SVal. */
106typedef UInt VtsID;
107#define VtsID_INVALID 0xFFFFFFFF
108
109
110
111/////////////////////////////////////////////////////////////////
112/////////////////////////////////////////////////////////////////
113// //
114// data decls: SVal //
115// //
116/////////////////////////////////////////////////////////////////
117/////////////////////////////////////////////////////////////////
118
119typedef ULong SVal;
120
121/* This value has special significance to the implementation, and callers
122 may not store it in the shadow memory. */
123#define SVal_INVALID (3ULL << 62)
124
125/* This is the default value for shadow memory. Initially the shadow
126 memory contains no accessible areas and so all reads produce this
127 value. TODO: make this caller-defineable. */
128#define SVal_NOACCESS (2ULL << 62)
129
130
131
132/////////////////////////////////////////////////////////////////
133/////////////////////////////////////////////////////////////////
134// //
135// data decls: ScalarTS //
136// //
137/////////////////////////////////////////////////////////////////
138/////////////////////////////////////////////////////////////////
139
140/* Scalar Timestamp. We have to store a lot of these, so there is
141 some effort to make them as small as possible. Logically they are
142 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
143 We pack it into 64 bits by representing the Thr* using a ThrID, a
144 small integer (18 bits), and a 46 bit integer for the timestamp
145 number. The 46/18 split is arbitary, but has the effect that
146 Helgrind can only handle programs that create 2^18 or fewer threads
147 over their entire lifetime, and have no more than 2^46 timestamp
148 ticks (synchronisation operations on the same thread).
149
150 This doesn't seem like much of a limitation. 2^46 ticks is
151 7.06e+13, and if each tick (optimistically) takes the machine 1000
152 cycles to process, then the minimum time to process that many ticks
153 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
154 but VTS ticks, which isn't realistic.
155
156 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
157 32 since a ThrID is a UInt. 29 comes from the fact that
158 'Thr_n_RCEC', which records information about old accesses, packs
159 not only a ThrID but also 2+1 other bits (access size and
160 writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
161
162 NB2: thrid values are issued upwards from 1024, and values less
163 than that aren't valid. This isn't per se necessary (any order
164 will do, so long as they are unique), but it does help ensure they
165 are less likely to get confused with the various other kinds of
166 small-integer thread ids drifting around (eg, TId). See also NB5.
167
168 NB3: this probably also relies on the fact that Thr's are never
169 deallocated -- they exist forever. Hence the 1-1 mapping from
170 Thr's to thrid values (set up in Thr__new) persists forever.
171
172 NB4: temp_max_sized_VTS is allocated at startup and never freed.
173 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
174 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
175 making the memory use for this go sky-high. With
176 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
177 like an OK tradeoff. If more than 256k threads need to be
178 supported, we could change SCALARTS_N_THRBITS to 20, which would
179 facilitate supporting 1 million threads at the cost of 8MB storage
180 for temp_max_sized_VTS.
181
182 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
183 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
184 must never be a valid ThrID. Given NB2 that's OK.
185*/
186#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
187
188#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
189typedef
190 struct {
191 ThrID thrid : SCALARTS_N_THRBITS;
192 ULong tym : SCALARTS_N_TYMBITS;
193 }
194 ScalarTS;
195
196#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
197
198
199
200/////////////////////////////////////////////////////////////////
201/////////////////////////////////////////////////////////////////
202// //
203// data decls: Filter //
204// //
205/////////////////////////////////////////////////////////////////
206/////////////////////////////////////////////////////////////////
207
208// baseline: 5, 9
209#define FI_LINE_SZB_LOG2 5
210#define FI_NUM_LINES_LOG2 10
211
212#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
213#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
214
215#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
216#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
217
218#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
219 & (Addr)(FI_NUM_LINES-1) )
220
221
222/* In the lines, each 8 bytes are treated individually, and are mapped
223 to a UShort. Regardless of endianness of the underlying machine,
224 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
225 the highest address.
226
227 Of each bit pair, the higher numbered bit is set if a R has been
228 seen, so the actual layout is:
229
230 15 14 ... 01 00
231
232 R W for addr+7 ... R W for addr+0
233
234 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
235*/
236
237/* tags are separated from lines. tags are Addrs and are
238 the base address of the line. */
239typedef
240 struct {
241 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
242 }
243 FiLine;
244
245typedef
246 struct {
247 Addr tags[FI_NUM_LINES];
248 FiLine lines[FI_NUM_LINES];
249 }
250 Filter;
251
252
253
254/////////////////////////////////////////////////////////////////
255/////////////////////////////////////////////////////////////////
256// //
257// data decls: Thr, ULong_n_EC //
258// //
259/////////////////////////////////////////////////////////////////
260/////////////////////////////////////////////////////////////////
261
262// Records stacks for H1 history mechanism (DRD-style)
263typedef
264 struct { ULong ull; ExeContext* ec; }
265 ULong_n_EC;
266
267
268/* How many of the above records to collect for each thread? Older
269 ones are dumped when we run out of space. 62.5k requires 1MB per
270 thread, since each ULong_n_EC record is 16 bytes long. When more
271 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
272 deleted to make space. Hence in the worst case we will be able to
273 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
274 Kw transitions (segments in this thread). For the current setting
275 that gives a guaranteed stack for at least the last 31.25k
276 segments. */
277#define N_KWs_N_STACKs_PER_THREAD 62500
278
279
280struct _Thr {
281 /* Current VTSs for this thread. They change as we go along. viR
282 is the VTS to be used for reads, viW for writes. Usually they
283 are the same, but can differ when we deal with reader-writer
284 locks. It is always the case that
285 VtsID__cmpLEQ(viW,viR) == True
286 that is, viW must be the same, or lagging behind, viR. */
287 VtsID viR;
288 VtsID viW;
289
290 /* Is initially False, and is set to True after the thread really
291 has done a low-level exit. When True, we expect to never see
292 any more memory references done by this thread. */
293 Bool llexit_done;
294
295 /* Is initially False, and is set to True after the thread has been
296 joined with (reaped by some other thread). After this point, we
297 do not expect to see any uses of .viR or .viW, so it is safe to
298 set them to VtsID_INVALID. */
299 Bool joinedwith_done;
300
301 /* A small integer giving a unique identity to this Thr. See
302 comments on the definition of ScalarTS for details. */
303 ThrID thrid : SCALARTS_N_THRBITS;
304
305 /* A filter that removes references for which we believe that
306 msmcread/msmcwrite will not change the state, nor report a
307 race. */
308 Filter* filter;
309
310 /* A pointer back to the top level Thread structure. There is a
311 1-1 mapping between Thread and Thr structures -- each Thr points
312 at its corresponding Thread, and vice versa. Really, Thr and
313 Thread should be merged into a single structure. */
314 Thread* hgthread;
315
316 /* The ULongs (scalar Kws) in this accumulate in strictly
317 increasing order, without duplicates. This is important because
318 we need to be able to find a given scalar Kw in this array
319 later, by binary search. */
320 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
321};
322
323
324
325/////////////////////////////////////////////////////////////////
326/////////////////////////////////////////////////////////////////
327// //
328// data decls: SO //
329// //
330/////////////////////////////////////////////////////////////////
331/////////////////////////////////////////////////////////////////
332
333// (UInt) `echo "Synchronisation object" | md5sum`
334#define SO_MAGIC 0x56b3c5b0U
335
336struct _SO {
337 struct _SO* admin_prev;
338 struct _SO* admin_next;
339 VtsID viR; /* r-clock of sender */
340 VtsID viW; /* w-clock of sender */
341 UInt magic;
342};
343
344
345
346/////////////////////////////////////////////////////////////////
347/////////////////////////////////////////////////////////////////
348// //
sewardj8f5374e2008-12-07 11:40:17 +0000349// Forward declarations //
350// //
351/////////////////////////////////////////////////////////////////
352/////////////////////////////////////////////////////////////////
353
sewardjf98e1c02008-10-25 16:22:41 +0000354/* fwds for
355 Globals needed by other parts of the library. These are set
356 once at startup and then never changed. */
357static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000358static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000359
sewardjffce8152011-06-24 10:09:41 +0000360/* misc fn and data fwdses */
361static void VtsID__rcinc ( VtsID ii );
362static void VtsID__rcdec ( VtsID ii );
363
364static inline Bool SVal__isC ( SVal s );
365static inline VtsID SVal__unC_Rmin ( SVal s );
366static inline VtsID SVal__unC_Wmin ( SVal s );
367static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
philippe1475a7f2015-05-11 19:45:08 +0000368static inline void SVal__rcinc ( SVal s );
369static inline void SVal__rcdec ( SVal s );
sewardjffce8152011-06-24 10:09:41 +0000370
371/* A double linked list of all the SO's. */
372SO* admin_SO;
373
sewardjf98e1c02008-10-25 16:22:41 +0000374
375
376/////////////////////////////////////////////////////////////////
377/////////////////////////////////////////////////////////////////
378// //
379// SECTION BEGIN compressed shadow memory //
380// //
381/////////////////////////////////////////////////////////////////
382/////////////////////////////////////////////////////////////////
383
384#ifndef __HB_ZSM_H
385#define __HB_ZSM_H
386
sewardjf98e1c02008-10-25 16:22:41 +0000387/* Initialise the library. Once initialised, it will (or may) call
philippe1475a7f2015-05-11 19:45:08 +0000388 SVal__rcinc and SVal__rcdec in response to all the calls below, in order to
sewardjf98e1c02008-10-25 16:22:41 +0000389 allow the user to do reference counting on the SVals stored herein.
390 It is important to understand, however, that due to internal
391 caching, the reference counts are in general inaccurate, and can be
392 both above or below the true reference count for an item. In
393 particular, the library may indicate that the reference count for
394 an item is zero, when in fact it is not.
395
396 To make the reference counting exact and therefore non-pointless,
397 call zsm_flush_cache. Immediately after it returns, the reference
398 counts for all items, as deduced by the caller by observing calls
philippe1475a7f2015-05-11 19:45:08 +0000399 to SVal__rcinc and SVal__rcdec, will be correct, and so any items with a
400 zero reference count may be freed (or at least considered to be
sewardjf98e1c02008-10-25 16:22:41 +0000401 unreferenced by this library).
402*/
philippe1475a7f2015-05-11 19:45:08 +0000403static void zsm_init ( void );
sewardjf98e1c02008-10-25 16:22:41 +0000404
sewardj23f12002009-07-24 08:45:08 +0000405static void zsm_sset_range ( Addr, SizeT, SVal );
philippef54cb662015-05-10 22:19:31 +0000406static void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew );
sewardj23f12002009-07-24 08:45:08 +0000407static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000408static void zsm_flush_cache ( void );
409
410#endif /* ! __HB_ZSM_H */
411
412
sewardjf98e1c02008-10-25 16:22:41 +0000413/* Round a up to the next multiple of N. N must be a power of 2 */
414#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
415/* Round a down to the next multiple of N. N must be a power of 2 */
416#define ROUNDDN(a, N) ((a) & ~(N-1))
417
philippef54cb662015-05-10 22:19:31 +0000418/* True if a belongs in range [start, start + szB[
419 (i.e. start + szB is excluded). */
420static inline Bool address_in_range (Addr a, Addr start, SizeT szB)
421{
422 /* Checking start <= a && a < start + szB.
423 As start and a are unsigned addresses, the condition can
424 be simplified. */
425 if (CHECK_ZSM)
426 tl_assert ((a - start < szB)
427 == (start <= a
428 && a < start + szB));
429 return a - start < szB;
430}
sewardjf98e1c02008-10-25 16:22:41 +0000431
sewardjf98e1c02008-10-25 16:22:41 +0000432/* ------ CacheLine ------ */
433
434#define N_LINE_BITS 6 /* must be >= 3 */
435#define N_LINE_ARANGE (1 << N_LINE_BITS)
436#define N_LINE_TREES (N_LINE_ARANGE >> 3)
437
438typedef
439 struct {
440 UShort descrs[N_LINE_TREES];
441 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
442 }
443 CacheLine;
444
445#define TREE_DESCR_16_0 (1<<0)
446#define TREE_DESCR_32_0 (1<<1)
447#define TREE_DESCR_16_1 (1<<2)
448#define TREE_DESCR_64 (1<<3)
449#define TREE_DESCR_16_2 (1<<4)
450#define TREE_DESCR_32_1 (1<<5)
451#define TREE_DESCR_16_3 (1<<6)
452#define TREE_DESCR_8_0 (1<<7)
453#define TREE_DESCR_8_1 (1<<8)
454#define TREE_DESCR_8_2 (1<<9)
455#define TREE_DESCR_8_3 (1<<10)
456#define TREE_DESCR_8_4 (1<<11)
457#define TREE_DESCR_8_5 (1<<12)
458#define TREE_DESCR_8_6 (1<<13)
459#define TREE_DESCR_8_7 (1<<14)
460#define TREE_DESCR_DTY (1<<15)
461
462typedef
463 struct {
464 SVal dict[4]; /* can represent up to 4 diff values in the line */
465 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
466 dict indexes */
467 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
468 LineF to use, and dict[2..] are also SVal_INVALID. */
469 }
470 LineZ; /* compressed rep for a cache line */
471
472typedef
473 struct {
474 Bool inUse;
475 SVal w64s[N_LINE_ARANGE];
476 }
477 LineF; /* full rep for a cache line */
478
479/* Shadow memory.
480 Primary map is a WordFM Addr SecMap*.
481 SecMaps cover some page-size-ish section of address space and hold
482 a compressed representation.
483 CacheLine-sized chunks of SecMaps are copied into a Cache, being
484 decompressed when moved into the cache and recompressed on the
485 way out. Because of this, the cache must operate as a writeback
486 cache, not a writethrough one.
487
488 Each SecMap must hold a power-of-2 number of CacheLines. Hence
489 N_SECMAP_BITS must >= N_LINE_BITS.
490*/
491#define N_SECMAP_BITS 13
492#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
493
494// # CacheLines held by a SecMap
495#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
496
497/* The data in the SecMap is held in the array of LineZs. Each LineZ
498 either carries the required data directly, in a compressed
499 representation, or it holds (in .dict[0]) an index to the LineF in
500 .linesF that holds the full representation.
501
502 Currently-unused LineF's have their .inUse bit set to zero.
503 Since each in-use LineF is referred to be exactly one LineZ,
504 the number of .linesZ[] that refer to .linesF should equal
505 the number of .linesF[] that have .inUse == True.
506
507 RC obligations: the RCs presented to the user include exactly
508 the values in:
509 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
510 * F reps that are in use (.inUse == True)
511
512 Hence the following actions at the following transitions are required:
513
514 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
515 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
516 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
517 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
518*/
519typedef
520 struct {
521 UInt magic;
522 LineZ linesZ[N_SECMAP_ZLINES];
523 LineF* linesF;
524 UInt linesF_size;
525 }
526 SecMap;
527
528#define SecMap_MAGIC 0x571e58cbU
529
philippef54cb662015-05-10 22:19:31 +0000530// (UInt) `echo "Free SecMap" | md5sum`
531#define SecMap_free_MAGIC 0x5a977f30U
532
sewardj5aa09bf2014-06-20 14:25:53 +0000533__attribute__((unused))
sewardjf98e1c02008-10-25 16:22:41 +0000534static inline Bool is_sane_SecMap ( SecMap* sm ) {
535 return sm != NULL && sm->magic == SecMap_MAGIC;
536}
537
538/* ------ Cache ------ */
539
540#define N_WAY_BITS 16
541#define N_WAY_NENT (1 << N_WAY_BITS)
542
543/* Each tag is the address of the associated CacheLine, rounded down
544 to a CacheLine address boundary. A CacheLine size must be a power
545 of 2 and must be 8 or more. Hence an easy way to initialise the
546 cache so it is empty is to set all the tag values to any value % 8
547 != 0, eg 1. This means all queries in the cache initially miss.
548 It does however require us to detect and not writeback, any line
549 with a bogus tag. */
550typedef
551 struct {
552 CacheLine lyns0[N_WAY_NENT];
553 Addr tags0[N_WAY_NENT];
554 }
555 Cache;
556
557static inline Bool is_valid_scache_tag ( Addr tag ) {
558 /* a valid tag should be naturally aligned to the start of
559 a CacheLine. */
560 return 0 == (tag & (N_LINE_ARANGE - 1));
561}
562
563
564/* --------- Primary data structures --------- */
565
566/* Shadow memory primary map */
567static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
568static Cache cache_shmem;
569
570
571static UWord stats__secmaps_search = 0; // # SM finds
572static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
573static UWord stats__secmaps_allocd = 0; // # SecMaps issued
philippef54cb662015-05-10 22:19:31 +0000574static UWord stats__secmaps_in_map_shmem = 0; // # SecMaps 'live'
575static UWord stats__secmaps_scanGC = 0; // # nr of scan GC done.
576static UWord stats__secmaps_scanGCed = 0; // # SecMaps GC-ed via scan
577static UWord stats__secmaps_ssetGCed = 0; // # SecMaps GC-ed via setnoaccess
sewardjf98e1c02008-10-25 16:22:41 +0000578static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
579static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
580static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
581static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
582static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
sewardjf98e1c02008-10-25 16:22:41 +0000583static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
584static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
585static UWord stats__cache_F_fetches = 0; // # F lines fetched
586static UWord stats__cache_F_wbacks = 0; // # F lines written back
philippef54cb662015-05-10 22:19:31 +0000587static UWord stats__cache_flushes_invals = 0; // # cache flushes and invals
sewardjf98e1c02008-10-25 16:22:41 +0000588static UWord stats__cache_totrefs = 0; // # total accesses
589static UWord stats__cache_totmisses = 0; // # misses
590static ULong stats__cache_make_New_arange = 0; // total arange made New
591static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
592static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000593static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
594static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
595static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
596static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
597static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
598static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
599static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
600static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
601static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
602static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
603static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
604static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
605static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
606static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000607static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
608static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
609static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
610static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
611static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
612static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000613static UWord stats__vts__tick = 0; // # calls to VTS__tick
614static UWord stats__vts__join = 0; // # calls to VTS__join
615static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
616static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
philippef54cb662015-05-10 22:19:31 +0000617static UWord stats__vts_tab_GC = 0; // # nr of vts_tab GC
sewardj7aa38a92011-02-27 23:04:12 +0000618
619// # calls to VTS__cmp_structural w/ slow case
620static UWord stats__vts__cmp_structural_slow = 0;
621
622// # calls to VTS__indexAt_SLOW
623static UWord stats__vts__indexat_slow = 0;
624
625// # calls to vts_set__find__or__clone_and_add
626static UWord stats__vts_set__focaa = 0;
627
628// # calls to vts_set__find__or__clone_and_add that lead to an
629// allocation
630static UWord stats__vts_set__focaa_a = 0;
sewardjc8028ad2010-05-05 09:34:42 +0000631
sewardjf98e1c02008-10-25 16:22:41 +0000632
633static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
634 return a & ~(N_SECMAP_ARANGE - 1);
635}
636static inline UWord shmem__get_SecMap_offset ( Addr a ) {
637 return a & (N_SECMAP_ARANGE - 1);
638}
639
640
641/*----------------------------------------------------------------*/
642/*--- map_shmem :: WordFM Addr SecMap ---*/
643/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
644/*----------------------------------------------------------------*/
645
646/*--------------- SecMap allocation --------------- */
647
648static HChar* shmem__bigchunk_next = NULL;
649static HChar* shmem__bigchunk_end1 = NULL;
650
651static void* shmem__bigchunk_alloc ( SizeT n )
652{
653 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
654 tl_assert(n > 0);
655 n = VG_ROUNDUP(n, 16);
656 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
657 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
658 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
659 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
660 if (0)
661 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
662 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
663 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
664 if (shmem__bigchunk_next == NULL)
665 VG_(out_of_memory_NORETURN)(
666 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
667 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
668 }
669 tl_assert(shmem__bigchunk_next);
670 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
671 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
672 shmem__bigchunk_next += n;
673 return shmem__bigchunk_next - n;
674}
675
philippef54cb662015-05-10 22:19:31 +0000676/* SecMap changed to be fully SVal_NOACCESS are inserted in a list of
677 recycled SecMap. When a new SecMap is needed, a recycled SecMap
678 will be used in preference to allocating a new SecMap. */
679/* We make a linked list of SecMap. LinesF pointer is re-used to
680 implement the link list. */
681static SecMap *SecMap_freelist = NULL;
682static UWord SecMap_freelist_length(void)
683{
684 SecMap *sm;
685 UWord n = 0;
686
687 sm = SecMap_freelist;
688 while (sm) {
689 n++;
690 sm = (SecMap*)sm->linesF;
691 }
692 return n;
693}
694
695static void push_SecMap_on_freelist(SecMap* sm)
696{
697 if (0) VG_(message)(Vg_DebugMsg, "%p push\n", sm);
698 sm->magic = SecMap_free_MAGIC;
699 sm->linesF = (LineF*)SecMap_freelist;
700 SecMap_freelist = sm;
701}
702/* Returns a free SecMap if there is one.
703 Otherwise, returns NULL. */
704static SecMap *pop_SecMap_from_freelist(void)
705{
706 SecMap *sm;
707
708 sm = SecMap_freelist;
709 if (sm) {
710 tl_assert (sm->magic == SecMap_free_MAGIC);
711 SecMap_freelist = (SecMap*)sm->linesF;
712 if (0) VG_(message)(Vg_DebugMsg, "%p pop\n", sm);
713 }
714 return sm;
715}
716
717static SecMap* shmem__alloc_or_recycle_SecMap ( void )
sewardjf98e1c02008-10-25 16:22:41 +0000718{
719 Word i, j;
philippef54cb662015-05-10 22:19:31 +0000720 SecMap* sm = pop_SecMap_from_freelist();
721
722 if (!sm) {
723 sm = shmem__bigchunk_alloc( sizeof(SecMap) );
724 stats__secmaps_allocd++;
725 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
726 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
727 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
728 }
sewardjf98e1c02008-10-25 16:22:41 +0000729 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
730 tl_assert(sm);
731 sm->magic = SecMap_MAGIC;
732 for (i = 0; i < N_SECMAP_ZLINES; i++) {
733 sm->linesZ[i].dict[0] = SVal_NOACCESS;
734 sm->linesZ[i].dict[1] = SVal_INVALID;
735 sm->linesZ[i].dict[2] = SVal_INVALID;
736 sm->linesZ[i].dict[3] = SVal_INVALID;
737 for (j = 0; j < N_LINE_ARANGE/4; j++)
738 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
739 }
740 sm->linesF = NULL;
741 sm->linesF_size = 0;
sewardjf98e1c02008-10-25 16:22:41 +0000742 return sm;
743}
744
745typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
746static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
747
748static SecMap* shmem__find_SecMap ( Addr ga )
749{
750 SecMap* sm = NULL;
751 Addr gaKey = shmem__round_to_SecMap_base(ga);
752 // Cache
753 stats__secmaps_search++;
754 if (LIKELY(gaKey == smCache[0].gaKey))
755 return smCache[0].sm;
756 if (LIKELY(gaKey == smCache[1].gaKey)) {
757 SMCacheEnt tmp = smCache[0];
758 smCache[0] = smCache[1];
759 smCache[1] = tmp;
760 return smCache[0].sm;
761 }
762 if (gaKey == smCache[2].gaKey) {
763 SMCacheEnt tmp = smCache[1];
764 smCache[1] = smCache[2];
765 smCache[2] = tmp;
766 return smCache[1].sm;
767 }
768 // end Cache
769 stats__secmaps_search_slow++;
770 if (VG_(lookupFM)( map_shmem,
771 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
772 tl_assert(sm != NULL);
773 smCache[2] = smCache[1];
774 smCache[1] = smCache[0];
775 smCache[0].gaKey = gaKey;
776 smCache[0].sm = sm;
777 } else {
778 tl_assert(sm == NULL);
779 }
780 return sm;
781}
782
philippef54cb662015-05-10 22:19:31 +0000783/* Scan the SecMap and count the SecMap that can be GC-ed.
784 If really, really does the GC of the SecMap. */
785/* NOT TO BE CALLED FROM WITHIN libzsm. */
786static UWord next_SecMap_GC_at = 1000;
787__attribute__((noinline))
788static UWord shmem__SecMap_do_GC(Bool really)
789{
790 UWord secmapW = 0;
791 Addr gaKey;
792 UWord examined = 0;
793 UWord ok_GCed = 0;
794
795 /* First invalidate the smCache */
796 smCache[0].gaKey = 1;
797 smCache[1].gaKey = 1;
798 smCache[2].gaKey = 1;
799 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(smCache[0]));
800
801 VG_(initIterFM)( map_shmem );
802 while (VG_(nextIterFM)( map_shmem, &gaKey, &secmapW )) {
803 UWord i;
804 UWord j;
805 SecMap* sm = (SecMap*)secmapW;
806 tl_assert(sm->magic == SecMap_MAGIC);
807 Bool ok_to_GC = True;
808
809 examined++;
810
811 /* Deal with the LineZs */
812 for (i = 0; i < N_SECMAP_ZLINES && ok_to_GC; i++) {
813 LineZ* lineZ = &sm->linesZ[i];
814 ok_to_GC = lineZ->dict[0] == SVal_INVALID
815 || (lineZ->dict[0] == SVal_NOACCESS
816 && !SVal__isC (lineZ->dict[1])
817 && !SVal__isC (lineZ->dict[2])
818 && !SVal__isC (lineZ->dict[3]));
819 }
820 /* Deal with the LineFs */
821 for (i = 0; i < sm->linesF_size && ok_to_GC; i++) {
822 LineF* lineF = &sm->linesF[i];
823 if (!lineF->inUse)
824 continue;
825 for (j = 0; j < N_LINE_ARANGE && ok_to_GC; j++)
826 ok_to_GC = lineF->w64s[j] == SVal_NOACCESS;
827 }
828 if (ok_to_GC)
829 ok_GCed++;
830 if (ok_to_GC && really) {
831 SecMap *fm_sm;
832 Addr fm_gaKey;
833 /* We cannot remove a SecMap from map_shmem while iterating.
834 So, stop iteration, remove from map_shmem, recreate the iteration
835 on the next SecMap. */
836 VG_(doneIterFM) ( map_shmem );
837 /* No need to rcdec linesZ or linesF, these are all SVal_NOACCESS or
838 not in use. We just need to free the linesF. */
839 if (sm->linesF_size > 0) {
840 HG_(free)(sm->linesF);
841 stats__secmap_linesF_allocd -= sm->linesF_size;
842 stats__secmap_linesF_bytes -= sm->linesF_size * sizeof(LineF);
843 }
844 if (!VG_(delFromFM)(map_shmem, &fm_gaKey, (UWord*)&fm_sm, gaKey))
845 tl_assert (0);
846 stats__secmaps_in_map_shmem--;
847 tl_assert (gaKey == fm_gaKey);
848 tl_assert (sm == fm_sm);
849 stats__secmaps_scanGCed++;
850 push_SecMap_on_freelist (sm);
851 VG_(initIterAtFM) (map_shmem, gaKey + N_SECMAP_ARANGE);
852 }
853 }
854 VG_(doneIterFM)( map_shmem );
855
856 if (really) {
857 stats__secmaps_scanGC++;
858 /* Next GC when we approach the max allocated */
859 next_SecMap_GC_at = stats__secmaps_allocd - 1000;
860 /* Unless we GCed less than 10%. We then allow to alloc 10%
861 more before GCing. This avoids doing a lot of costly GC
862 for the worst case : the 'growing phase' of an application
863 that allocates a lot of memory.
864 Worst can can be reproduced e.g. by
865 perf/memrw -t 30000000 -b 1000 -r 1 -l 1
866 that allocates around 30Gb of memory. */
867 if (ok_GCed < stats__secmaps_allocd/10)
868 next_SecMap_GC_at = stats__secmaps_allocd + stats__secmaps_allocd/10;
869
870 }
871
872 if (VG_(clo_stats) && really) {
873 VG_(message)(Vg_DebugMsg,
874 "libhb: SecMap GC: #%lu scanned %lu, GCed %lu,"
875 " next GC at %lu\n",
876 stats__secmaps_scanGC, examined, ok_GCed,
877 next_SecMap_GC_at);
878 }
879
880 return ok_GCed;
881}
882
sewardjf98e1c02008-10-25 16:22:41 +0000883static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
884{
885 SecMap* sm = shmem__find_SecMap ( ga );
886 if (LIKELY(sm)) {
philippef54cb662015-05-10 22:19:31 +0000887 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000888 return sm;
889 } else {
890 /* create a new one */
891 Addr gaKey = shmem__round_to_SecMap_base(ga);
philippef54cb662015-05-10 22:19:31 +0000892 sm = shmem__alloc_or_recycle_SecMap();
sewardjf98e1c02008-10-25 16:22:41 +0000893 tl_assert(sm);
894 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
philippef54cb662015-05-10 22:19:31 +0000895 stats__secmaps_in_map_shmem++;
896 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
sewardjf98e1c02008-10-25 16:22:41 +0000897 return sm;
898 }
899}
900
901
902/* ------------ LineF and LineZ related ------------ */
903
904static void rcinc_LineF ( LineF* lineF ) {
905 UWord i;
906 tl_assert(lineF->inUse);
907 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000908 SVal__rcinc(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000909}
910
911static void rcdec_LineF ( LineF* lineF ) {
912 UWord i;
913 tl_assert(lineF->inUse);
914 for (i = 0; i < N_LINE_ARANGE; i++)
philippe1475a7f2015-05-11 19:45:08 +0000915 SVal__rcdec(lineF->w64s[i]);
sewardjf98e1c02008-10-25 16:22:41 +0000916}
917
918static void rcinc_LineZ ( LineZ* lineZ ) {
919 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000920 SVal__rcinc(lineZ->dict[0]);
921 if (lineZ->dict[1] != SVal_INVALID) SVal__rcinc(lineZ->dict[1]);
922 if (lineZ->dict[2] != SVal_INVALID) SVal__rcinc(lineZ->dict[2]);
923 if (lineZ->dict[3] != SVal_INVALID) SVal__rcinc(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000924}
925
926static void rcdec_LineZ ( LineZ* lineZ ) {
927 tl_assert(lineZ->dict[0] != SVal_INVALID);
philippe1475a7f2015-05-11 19:45:08 +0000928 SVal__rcdec(lineZ->dict[0]);
929 if (lineZ->dict[1] != SVal_INVALID) SVal__rcdec(lineZ->dict[1]);
930 if (lineZ->dict[2] != SVal_INVALID) SVal__rcdec(lineZ->dict[2]);
931 if (lineZ->dict[3] != SVal_INVALID) SVal__rcdec(lineZ->dict[3]);
sewardjf98e1c02008-10-25 16:22:41 +0000932}
933
934inline
935static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
936 Word bix, shft, mask, prep;
937 tl_assert(ix >= 0);
938 bix = ix >> 2;
939 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
940 mask = 3 << shft;
941 prep = b2 << shft;
942 arr[bix] = (arr[bix] & ~mask) | prep;
943}
944
945inline
946static UWord read_twobit_array ( UChar* arr, UWord ix ) {
947 Word bix, shft;
948 tl_assert(ix >= 0);
949 bix = ix >> 2;
950 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
951 return (arr[bix] >> shft) & 3;
952}
953
954/* Given address 'tag', find either the Z or F line containing relevant
955 data, so it can be read into the cache.
956*/
957static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
958 /*OUT*/LineF** fp, Addr tag ) {
959 LineZ* lineZ;
960 LineF* lineF;
961 UWord zix;
962 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
963 UWord smoff = shmem__get_SecMap_offset(tag);
964 /* since smoff is derived from a valid tag, it should be
965 cacheline-aligned. */
966 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
967 zix = smoff >> N_LINE_BITS;
968 tl_assert(zix < N_SECMAP_ZLINES);
969 lineZ = &sm->linesZ[zix];
970 lineF = NULL;
971 if (lineZ->dict[0] == SVal_INVALID) {
972 UInt fix = (UInt)lineZ->dict[1];
973 tl_assert(sm->linesF);
974 tl_assert(sm->linesF_size > 0);
975 tl_assert(fix >= 0 && fix < sm->linesF_size);
976 lineF = &sm->linesF[fix];
977 tl_assert(lineF->inUse);
978 lineZ = NULL;
979 }
980 *zp = lineZ;
981 *fp = lineF;
982}
983
984/* Given address 'tag', return the relevant SecMap and the index of
985 the LineZ within it, in the expectation that the line is to be
986 overwritten. Regardless of whether 'tag' is currently associated
987 with a Z or F representation, to rcdec on the current
988 representation, in recognition of the fact that the contents are
989 just about to be overwritten. */
990static __attribute__((noinline))
991void find_Z_for_writing ( /*OUT*/SecMap** smp,
992 /*OUT*/Word* zixp,
993 Addr tag ) {
994 LineZ* lineZ;
995 LineF* lineF;
996 UWord zix;
997 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
998 UWord smoff = shmem__get_SecMap_offset(tag);
999 /* since smoff is derived from a valid tag, it should be
1000 cacheline-aligned. */
1001 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
1002 zix = smoff >> N_LINE_BITS;
1003 tl_assert(zix < N_SECMAP_ZLINES);
1004 lineZ = &sm->linesZ[zix];
1005 lineF = NULL;
1006 /* re RCs, we are freeing up this LineZ/LineF so that new data can
1007 be parked in it. Hence have to rcdec it accordingly. */
1008 /* If lineZ has an associated lineF, free it up. */
1009 if (lineZ->dict[0] == SVal_INVALID) {
1010 UInt fix = (UInt)lineZ->dict[1];
1011 tl_assert(sm->linesF);
1012 tl_assert(sm->linesF_size > 0);
1013 tl_assert(fix >= 0 && fix < sm->linesF_size);
1014 lineF = &sm->linesF[fix];
1015 tl_assert(lineF->inUse);
1016 rcdec_LineF(lineF);
1017 lineF->inUse = False;
1018 } else {
1019 rcdec_LineZ(lineZ);
1020 }
1021 *smp = sm;
1022 *zixp = zix;
1023}
1024
1025static __attribute__((noinline))
1026void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
1027 UInt i, new_size;
1028 LineF* nyu;
1029
1030 if (sm->linesF) {
1031 tl_assert(sm->linesF_size > 0);
1032 } else {
1033 tl_assert(sm->linesF_size == 0);
1034 }
1035
1036 if (sm->linesF) {
1037 for (i = 0; i < sm->linesF_size; i++) {
1038 if (!sm->linesF[i].inUse) {
1039 *fixp = (Word)i;
1040 return;
1041 }
1042 }
1043 }
1044
1045 /* No free F line found. Expand existing array and try again. */
1046 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
1047 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
1048 new_size * sizeof(LineF) );
sewardjf98e1c02008-10-25 16:22:41 +00001049
1050 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
1051 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
1052 * sizeof(LineF);
1053
1054 if (0)
1055 VG_(printf)("SM %p: expand F array from %d to %d\n",
1056 sm, (Int)sm->linesF_size, new_size);
1057
1058 for (i = 0; i < new_size; i++)
1059 nyu[i].inUse = False;
1060
1061 if (sm->linesF) {
1062 for (i = 0; i < sm->linesF_size; i++) {
1063 tl_assert(sm->linesF[i].inUse);
1064 nyu[i] = sm->linesF[i];
1065 }
1066 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
1067 HG_(free)(sm->linesF);
1068 }
1069
1070 sm->linesF = nyu;
1071 sm->linesF_size = new_size;
1072
1073 for (i = 0; i < sm->linesF_size; i++) {
1074 if (!sm->linesF[i].inUse) {
1075 *fixp = (Word)i;
1076 return;
1077 }
philippe47124e92015-04-25 14:00:24 +00001078 }
sewardjf98e1c02008-10-25 16:22:41 +00001079
philippe47124e92015-04-25 14:00:24 +00001080 /*NOTREACHED*/
1081 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00001082}
1083
1084
1085/* ------------ CacheLine and implicit-tree related ------------ */
1086
1087__attribute__((unused))
1088static void pp_CacheLine ( CacheLine* cl ) {
1089 Word i;
1090 if (!cl) {
1091 VG_(printf)("%s","pp_CacheLine(NULL)\n");
1092 return;
1093 }
1094 for (i = 0; i < N_LINE_TREES; i++)
1095 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
1096 for (i = 0; i < N_LINE_ARANGE; i++)
1097 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
1098}
1099
1100static UChar descr_to_validbits ( UShort descr )
1101{
1102 /* a.k.a Party Time for gcc's constant folder */
1103# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
1104 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
1105 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
1106 ( (b8_5) << 12) | ( (b8_4) << 11) | \
1107 ( (b8_3) << 10) | ( (b8_2) << 9) | \
1108 ( (b8_1) << 8) | ( (b8_0) << 7) | \
1109 ( (b16_3) << 6) | ( (b32_1) << 5) | \
1110 ( (b16_2) << 4) | ( (b64) << 3) | \
1111 ( (b16_1) << 2) | ( (b32_0) << 1) | \
1112 ( (b16_0) << 0) ) )
1113
1114# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
1115 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
1116 ( (bit5) << 5) | ( (bit4) << 4) | \
1117 ( (bit3) << 3) | ( (bit2) << 2) | \
1118 ( (bit1) << 1) | ( (bit0) << 0) ) )
1119
1120 /* these should all get folded out at compile time */
1121 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
1122 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
1123 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
1124 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
1125 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
1126 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
1127 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
1128 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
1129 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
1130
1131 switch (descr) {
1132 /*
1133 +--------------------------------- TREE_DESCR_8_7
1134 | +------------------- TREE_DESCR_8_0
1135 | | +---------------- TREE_DESCR_16_3
1136 | | | +-------------- TREE_DESCR_32_1
1137 | | | | +------------ TREE_DESCR_16_2
1138 | | | | | +--------- TREE_DESCR_64
1139 | | | | | | +------ TREE_DESCR_16_1
1140 | | | | | | | +---- TREE_DESCR_32_0
1141 | | | | | | | | +-- TREE_DESCR_16_0
1142 | | | | | | | | |
1143 | | | | | | | | | GRANULARITY, 7 -> 0 */
1144 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
1145 return BYTE(1,1,1,1,1,1,1,1);
1146 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
1147 return BYTE(1,1,0,1,1,1,1,1);
1148 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
1149 return BYTE(0,1,1,1,1,1,1,1);
1150 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
1151 return BYTE(0,1,0,1,1,1,1,1);
1152
1153 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
1154 return BYTE(1,1,1,1,1,1,0,1);
1155 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
1156 return BYTE(1,1,0,1,1,1,0,1);
1157 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
1158 return BYTE(0,1,1,1,1,1,0,1);
1159 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
1160 return BYTE(0,1,0,1,1,1,0,1);
1161
1162 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
1163 return BYTE(1,1,1,1,0,1,1,1);
1164 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1165 return BYTE(1,1,0,1,0,1,1,1);
1166 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1167 return BYTE(0,1,1,1,0,1,1,1);
1168 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1169 return BYTE(0,1,0,1,0,1,1,1);
1170
1171 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1172 return BYTE(1,1,1,1,0,1,0,1);
1173 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1174 return BYTE(1,1,0,1,0,1,0,1);
1175 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1176 return BYTE(0,1,1,1,0,1,0,1);
1177 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1178 return BYTE(0,1,0,1,0,1,0,1);
1179
1180 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1181 return BYTE(0,0,0,1,1,1,1,1);
1182 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1183 return BYTE(0,0,0,1,1,1,0,1);
1184 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1185 return BYTE(0,0,0,1,0,1,1,1);
1186 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1187 return BYTE(0,0,0,1,0,1,0,1);
1188
1189 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1190 return BYTE(1,1,1,1,0,0,0,1);
1191 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1192 return BYTE(1,1,0,1,0,0,0,1);
1193 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1194 return BYTE(0,1,1,1,0,0,0,1);
1195 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1196 return BYTE(0,1,0,1,0,0,0,1);
1197
1198 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1199 return BYTE(0,0,0,1,0,0,0,1);
1200
1201 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1202 return BYTE(0,0,0,0,0,0,0,1);
1203
1204 default: return BYTE(0,0,0,0,0,0,0,0);
1205 /* INVALID - any valid descr produces at least one
1206 valid bit in tree[0..7]*/
1207 }
1208 /* NOTREACHED*/
1209 tl_assert(0);
1210
1211# undef DESCR
1212# undef BYTE
1213}
1214
1215__attribute__((unused))
1216static Bool is_sane_Descr ( UShort descr ) {
1217 return descr_to_validbits(descr) != 0;
1218}
1219
1220static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1221 VG_(sprintf)(dst,
1222 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1223 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1224 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1225 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1226 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1227 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1228 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1229 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1230 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1231 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1232 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1233 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1234 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1235 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1236 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1237 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1238 );
1239}
1240static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1241 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1242 (Int)((byte & 128) ? 1 : 0),
1243 (Int)((byte & 64) ? 1 : 0),
1244 (Int)((byte & 32) ? 1 : 0),
1245 (Int)((byte & 16) ? 1 : 0),
1246 (Int)((byte & 8) ? 1 : 0),
1247 (Int)((byte & 4) ? 1 : 0),
1248 (Int)((byte & 2) ? 1 : 0),
1249 (Int)((byte & 1) ? 1 : 0)
1250 );
1251}
1252
1253static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1254 Word i;
1255 UChar validbits = descr_to_validbits(descr);
florian7b7d5942014-12-19 20:29:22 +00001256 HChar buf[128], buf2[128]; // large enough
sewardjf98e1c02008-10-25 16:22:41 +00001257 if (validbits == 0)
1258 goto bad;
1259 for (i = 0; i < 8; i++) {
1260 if (validbits & (1<<i)) {
1261 if (tree[i] == SVal_INVALID)
1262 goto bad;
1263 } else {
1264 if (tree[i] != SVal_INVALID)
1265 goto bad;
1266 }
1267 }
1268 return True;
1269 bad:
1270 sprintf_Descr( buf, descr );
1271 sprintf_Byte( buf2, validbits );
1272 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1273 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1274 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1275 for (i = 0; i < 8; i++)
1276 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1277 VG_(printf)("%s","}\n");
1278 return 0;
1279}
1280
1281static Bool is_sane_CacheLine ( CacheLine* cl )
1282{
1283 Word tno, cloff;
1284
1285 if (!cl) goto bad;
1286
1287 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1288 UShort descr = cl->descrs[tno];
1289 SVal* tree = &cl->svals[cloff];
1290 if (!is_sane_Descr_and_Tree(descr, tree))
1291 goto bad;
1292 }
1293 tl_assert(cloff == N_LINE_ARANGE);
1294 return True;
1295 bad:
1296 pp_CacheLine(cl);
1297 return False;
1298}
1299
1300static UShort normalise_tree ( /*MOD*/SVal* tree )
1301{
1302 UShort descr;
1303 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1304 particular no zeroes. */
philippe1475a7f2015-05-11 19:45:08 +00001305 if (CHECK_ZSM
1306 && UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1307 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1308 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1309 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
sewardjf98e1c02008-10-25 16:22:41 +00001310 tl_assert(0);
1311
1312 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1313 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1314 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1315 /* build 16-bit layer */
1316 if (tree[1] == tree[0]) {
1317 tree[1] = SVal_INVALID;
1318 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1319 descr |= TREE_DESCR_16_0;
1320 }
1321 if (tree[3] == tree[2]) {
1322 tree[3] = SVal_INVALID;
1323 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1324 descr |= TREE_DESCR_16_1;
1325 }
1326 if (tree[5] == tree[4]) {
1327 tree[5] = SVal_INVALID;
1328 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1329 descr |= TREE_DESCR_16_2;
1330 }
1331 if (tree[7] == tree[6]) {
1332 tree[7] = SVal_INVALID;
1333 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1334 descr |= TREE_DESCR_16_3;
1335 }
1336 /* build 32-bit layer */
1337 if (tree[2] == tree[0]
1338 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1339 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1340 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1341 descr |= TREE_DESCR_32_0;
1342 }
1343 if (tree[6] == tree[4]
1344 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1345 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1346 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1347 descr |= TREE_DESCR_32_1;
1348 }
1349 /* build 64-bit layer */
1350 if (tree[4] == tree[0]
1351 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1352 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1353 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1354 descr |= TREE_DESCR_64;
1355 }
1356 return descr;
1357}
1358
1359/* This takes a cacheline where all the data is at the leaves
1360 (w8[..]) and builds a correctly normalised tree. */
1361static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1362{
1363 Word tno, cloff;
1364 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1365 SVal* tree = &cl->svals[cloff];
1366 cl->descrs[tno] = normalise_tree( tree );
1367 }
1368 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +00001369 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001370 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1371 stats__cline_normalises++;
1372}
1373
1374
1375typedef struct { UChar count; SVal sval; } CountedSVal;
1376
1377static
1378void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1379 /*OUT*/Word* dstUsedP,
1380 Word nDst, CacheLine* src )
1381{
1382 Word tno, cloff, dstUsed;
1383
1384 tl_assert(nDst == N_LINE_ARANGE);
1385 dstUsed = 0;
1386
1387 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1388 UShort descr = src->descrs[tno];
1389 SVal* tree = &src->svals[cloff];
1390
1391 /* sequentialise the tree described by (descr,tree). */
1392# define PUT(_n,_v) \
1393 do { dst[dstUsed ].count = (_n); \
1394 dst[dstUsed++].sval = (_v); \
1395 } while (0)
1396
1397 /* byte 0 */
1398 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1399 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1400 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1401 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1402 /* byte 1 */
1403 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1404 /* byte 2 */
1405 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1406 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1407 /* byte 3 */
1408 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1409 /* byte 4 */
1410 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1411 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1412 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1413 /* byte 5 */
1414 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1415 /* byte 6 */
1416 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1417 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1418 /* byte 7 */
1419 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1420
1421# undef PUT
1422 /* END sequentialise the tree described by (descr,tree). */
1423
1424 }
1425 tl_assert(cloff == N_LINE_ARANGE);
1426 tl_assert(dstUsed <= nDst);
1427
1428 *dstUsedP = dstUsed;
1429}
1430
1431/* Write the cacheline 'wix' to backing store. Where it ends up
1432 is determined by its tag field. */
1433static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1434{
1435 Word i, j, k, m;
1436 Addr tag;
1437 SecMap* sm;
1438 CacheLine* cl;
1439 LineZ* lineZ;
1440 LineF* lineF;
1441 Word zix, fix, csvalsUsed;
1442 CountedSVal csvals[N_LINE_ARANGE];
1443 SVal sv;
1444
1445 if (0)
1446 VG_(printf)("scache wback line %d\n", (Int)wix);
1447
1448 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1449
1450 tag = cache_shmem.tags0[wix];
1451 cl = &cache_shmem.lyns0[wix];
1452
1453 /* The cache line may have been invalidated; if so, ignore it. */
1454 if (!is_valid_scache_tag(tag))
1455 return;
1456
1457 /* Where are we going to put it? */
1458 sm = NULL;
1459 lineZ = NULL;
1460 lineF = NULL;
1461 zix = fix = -1;
1462
1463 /* find the Z line to write in and rcdec it or the associated F
1464 line. */
1465 find_Z_for_writing( &sm, &zix, tag );
1466
1467 tl_assert(sm);
1468 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1469 lineZ = &sm->linesZ[zix];
1470
1471 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001472 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001473 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1474
1475 csvalsUsed = -1;
1476 sequentialise_CacheLine( csvals, &csvalsUsed,
1477 N_LINE_ARANGE, cl );
1478 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1479 if (0) VG_(printf)("%lu ", csvalsUsed);
1480
1481 lineZ->dict[0] = lineZ->dict[1]
1482 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1483
1484 /* i indexes actual shadow values, k is cursor in csvals */
1485 i = 0;
1486 for (k = 0; k < csvalsUsed; k++) {
1487
1488 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001489 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001490 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1491 /* do we already have it? */
1492 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1493 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1494 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1495 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1496 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001497 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001498 tl_assert(sv != SVal_INVALID);
1499 if (lineZ->dict[0]
1500 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1501 if (lineZ->dict[1]
1502 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1503 if (lineZ->dict[2]
1504 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1505 if (lineZ->dict[3]
1506 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1507 break; /* we'll have to use the f rep */
1508 dict_ok:
1509 m = csvals[k].count;
1510 if (m == 8) {
1511 write_twobit_array( lineZ->ix2s, i+0, j );
1512 write_twobit_array( lineZ->ix2s, i+1, j );
1513 write_twobit_array( lineZ->ix2s, i+2, j );
1514 write_twobit_array( lineZ->ix2s, i+3, j );
1515 write_twobit_array( lineZ->ix2s, i+4, j );
1516 write_twobit_array( lineZ->ix2s, i+5, j );
1517 write_twobit_array( lineZ->ix2s, i+6, j );
1518 write_twobit_array( lineZ->ix2s, i+7, j );
1519 i += 8;
1520 }
1521 else if (m == 4) {
1522 write_twobit_array( lineZ->ix2s, i+0, j );
1523 write_twobit_array( lineZ->ix2s, i+1, j );
1524 write_twobit_array( lineZ->ix2s, i+2, j );
1525 write_twobit_array( lineZ->ix2s, i+3, j );
1526 i += 4;
1527 }
1528 else if (m == 1) {
1529 write_twobit_array( lineZ->ix2s, i+0, j );
1530 i += 1;
1531 }
1532 else if (m == 2) {
1533 write_twobit_array( lineZ->ix2s, i+0, j );
1534 write_twobit_array( lineZ->ix2s, i+1, j );
1535 i += 2;
1536 }
1537 else {
1538 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1539 }
1540
1541 }
1542
1543 if (LIKELY(i == N_LINE_ARANGE)) {
1544 /* Construction of the compressed representation was
1545 successful. */
1546 rcinc_LineZ(lineZ);
1547 stats__cache_Z_wbacks++;
1548 } else {
1549 /* Cannot use the compressed(z) representation. Use the full(f)
1550 rep instead. */
1551 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1552 alloc_F_for_writing( sm, &fix );
1553 tl_assert(sm->linesF);
1554 tl_assert(sm->linesF_size > 0);
1555 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1556 lineF = &sm->linesF[fix];
1557 tl_assert(!lineF->inUse);
1558 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1559 lineZ->dict[1] = (SVal)fix;
1560 lineF->inUse = True;
1561 i = 0;
1562 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001563 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001564 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1565 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001566 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001567 tl_assert(sv != SVal_INVALID);
1568 for (m = csvals[k].count; m > 0; m--) {
1569 lineF->w64s[i] = sv;
1570 i++;
1571 }
1572 }
1573 tl_assert(i == N_LINE_ARANGE);
1574 rcinc_LineF(lineF);
1575 stats__cache_F_wbacks++;
1576 }
sewardjf98e1c02008-10-25 16:22:41 +00001577}
1578
1579/* Fetch the cacheline 'wix' from the backing store. The tag
1580 associated with 'wix' is assumed to have already been filled in;
1581 hence that is used to determine where in the backing store to read
1582 from. */
1583static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1584{
1585 Word i;
1586 Addr tag;
1587 CacheLine* cl;
1588 LineZ* lineZ;
1589 LineF* lineF;
1590
1591 if (0)
1592 VG_(printf)("scache fetch line %d\n", (Int)wix);
1593
1594 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1595
1596 tag = cache_shmem.tags0[wix];
1597 cl = &cache_shmem.lyns0[wix];
1598
1599 /* reject nonsense requests */
1600 tl_assert(is_valid_scache_tag(tag));
1601
1602 lineZ = NULL;
1603 lineF = NULL;
1604 find_ZF_for_reading( &lineZ, &lineF, tag );
1605 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1606
1607 /* expand the data into the bottom layer of the tree, then get
1608 cacheline_normalise to build the descriptor array. */
1609 if (lineF) {
1610 tl_assert(lineF->inUse);
1611 for (i = 0; i < N_LINE_ARANGE; i++) {
1612 cl->svals[i] = lineF->w64s[i];
1613 }
1614 stats__cache_F_fetches++;
1615 } else {
1616 for (i = 0; i < N_LINE_ARANGE; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00001617 UWord ix = read_twobit_array( lineZ->ix2s, i );
philippe1475a7f2015-05-11 19:45:08 +00001618 if (CHECK_ZSM) tl_assert(ix >= 0 && ix <= 3);
1619 cl->svals[i] = lineZ->dict[ix];
1620 if (CHECK_ZSM) tl_assert(cl->svals[i] != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00001621 }
1622 stats__cache_Z_fetches++;
1623 }
1624 normalise_CacheLine( cl );
1625}
1626
philippef54cb662015-05-10 22:19:31 +00001627/* Invalid the cachelines corresponding to the given range. */
1628static void shmem__invalidate_scache_range (Addr ga, SizeT szB)
1629{
1630 Addr before_start = ga;
1631 Addr aligned_start = ROUNDUP(ga, N_LINE_ARANGE);
1632 Addr after_start = ROUNDDN(ga + szB, N_LINE_ARANGE);
1633 UWord before_len = aligned_start - before_start;
1634 UWord after_len = ga + szB - after_start;
1635
1636 /* Write-back cachelines partially set to NOACCESS */
1637 if (before_len > 0) {
1638 zsm_sset_range_SMALL (before_start, before_len, SVal_NOACCESS);
1639 szB += N_LINE_ARANGE - before_len;
sewardjf98e1c02008-10-25 16:22:41 +00001640 }
philippef54cb662015-05-10 22:19:31 +00001641 if (after_len > 0) {
1642 zsm_sset_range_SMALL (after_start, after_len, SVal_NOACCESS);
1643 szB += N_LINE_ARANGE - after_len;
1644 }
1645
1646 /* szB must now be a multiple of cacheline size. */
1647 tl_assert (0 == (szB & (N_LINE_ARANGE - 1)));
1648
1649 Word wix;
1650
1651 Word ga_ix = (ga >> N_LINE_BITS) & (N_WAY_NENT - 1);
1652 Word nwix = szB / N_LINE_ARANGE;
1653
1654 if (nwix > N_WAY_NENT)
1655 nwix = N_WAY_NENT; // no need to check several times the same entry.
1656
1657 for (wix = 0; wix < nwix; wix++) {
1658 if (address_in_range(cache_shmem.tags0[ga_ix], ga, szB))
1659 cache_shmem.tags0[ga_ix] = 1/*INVALID*/;
1660 ga_ix++;
1661 if (ga_ix == N_WAY_NENT)
1662 ga_ix = 0;
1663 }
sewardjf98e1c02008-10-25 16:22:41 +00001664}
1665
philippef54cb662015-05-10 22:19:31 +00001666
sewardjf98e1c02008-10-25 16:22:41 +00001667static void shmem__flush_and_invalidate_scache ( void ) {
1668 Word wix;
1669 Addr tag;
1670 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1671 tl_assert(!is_valid_scache_tag(1));
1672 for (wix = 0; wix < N_WAY_NENT; wix++) {
1673 tag = cache_shmem.tags0[wix];
1674 if (tag == 1/*INVALID*/) {
1675 /* already invalid; nothing to do */
1676 } else {
1677 tl_assert(is_valid_scache_tag(tag));
1678 cacheline_wback( wix );
1679 }
1680 cache_shmem.tags0[wix] = 1/*INVALID*/;
1681 }
philippef54cb662015-05-10 22:19:31 +00001682 stats__cache_flushes_invals++;
sewardjf98e1c02008-10-25 16:22:41 +00001683}
1684
1685
1686static inline Bool aligned16 ( Addr a ) {
1687 return 0 == (a & 1);
1688}
1689static inline Bool aligned32 ( Addr a ) {
1690 return 0 == (a & 3);
1691}
1692static inline Bool aligned64 ( Addr a ) {
1693 return 0 == (a & 7);
1694}
1695static inline UWord get_cacheline_offset ( Addr a ) {
1696 return (UWord)(a & (N_LINE_ARANGE - 1));
1697}
1698static inline Addr cacheline_ROUNDUP ( Addr a ) {
1699 return ROUNDUP(a, N_LINE_ARANGE);
1700}
1701static inline Addr cacheline_ROUNDDN ( Addr a ) {
1702 return ROUNDDN(a, N_LINE_ARANGE);
1703}
1704static inline UWord get_treeno ( Addr a ) {
1705 return get_cacheline_offset(a) >> 3;
1706}
1707static inline UWord get_tree_offset ( Addr a ) {
1708 return a & 7;
1709}
1710
1711static __attribute__((noinline))
1712 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1713static inline CacheLine* get_cacheline ( Addr a )
1714{
1715 /* tag is 'a' with the in-line offset masked out,
1716 eg a[31]..a[4] 0000 */
1717 Addr tag = a & ~(N_LINE_ARANGE - 1);
1718 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1719 stats__cache_totrefs++;
1720 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1721 return &cache_shmem.lyns0[wix];
1722 } else {
1723 return get_cacheline_MISS( a );
1724 }
1725}
1726
1727static __attribute__((noinline))
1728 CacheLine* get_cacheline_MISS ( Addr a )
1729{
1730 /* tag is 'a' with the in-line offset masked out,
1731 eg a[31]..a[4] 0000 */
1732
1733 CacheLine* cl;
1734 Addr* tag_old_p;
1735 Addr tag = a & ~(N_LINE_ARANGE - 1);
1736 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1737
1738 tl_assert(tag != cache_shmem.tags0[wix]);
1739
1740 /* Dump the old line into the backing store. */
1741 stats__cache_totmisses++;
1742
1743 cl = &cache_shmem.lyns0[wix];
1744 tag_old_p = &cache_shmem.tags0[wix];
1745
1746 if (is_valid_scache_tag( *tag_old_p )) {
1747 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001748 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001749 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1750 cacheline_wback( wix );
1751 }
1752 /* and reload the new one */
1753 *tag_old_p = tag;
1754 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001755 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001756 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1757 return cl;
1758}
1759
1760static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1761 stats__cline_64to32pulldown++;
1762 switch (toff) {
1763 case 0: case 4:
1764 tl_assert(descr & TREE_DESCR_64);
1765 tree[4] = tree[0];
1766 descr &= ~TREE_DESCR_64;
1767 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1768 break;
1769 default:
1770 tl_assert(0);
1771 }
1772 return descr;
1773}
1774
1775static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1776 stats__cline_32to16pulldown++;
1777 switch (toff) {
1778 case 0: case 2:
1779 if (!(descr & TREE_DESCR_32_0)) {
1780 descr = pulldown_to_32(tree, 0, descr);
1781 }
1782 tl_assert(descr & TREE_DESCR_32_0);
1783 tree[2] = tree[0];
1784 descr &= ~TREE_DESCR_32_0;
1785 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1786 break;
1787 case 4: case 6:
1788 if (!(descr & TREE_DESCR_32_1)) {
1789 descr = pulldown_to_32(tree, 4, descr);
1790 }
1791 tl_assert(descr & TREE_DESCR_32_1);
1792 tree[6] = tree[4];
1793 descr &= ~TREE_DESCR_32_1;
1794 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1795 break;
1796 default:
1797 tl_assert(0);
1798 }
1799 return descr;
1800}
1801
1802static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1803 stats__cline_16to8pulldown++;
1804 switch (toff) {
1805 case 0: case 1:
1806 if (!(descr & TREE_DESCR_16_0)) {
1807 descr = pulldown_to_16(tree, 0, descr);
1808 }
1809 tl_assert(descr & TREE_DESCR_16_0);
1810 tree[1] = tree[0];
1811 descr &= ~TREE_DESCR_16_0;
1812 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1813 break;
1814 case 2: case 3:
1815 if (!(descr & TREE_DESCR_16_1)) {
1816 descr = pulldown_to_16(tree, 2, descr);
1817 }
1818 tl_assert(descr & TREE_DESCR_16_1);
1819 tree[3] = tree[2];
1820 descr &= ~TREE_DESCR_16_1;
1821 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1822 break;
1823 case 4: case 5:
1824 if (!(descr & TREE_DESCR_16_2)) {
1825 descr = pulldown_to_16(tree, 4, descr);
1826 }
1827 tl_assert(descr & TREE_DESCR_16_2);
1828 tree[5] = tree[4];
1829 descr &= ~TREE_DESCR_16_2;
1830 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1831 break;
1832 case 6: case 7:
1833 if (!(descr & TREE_DESCR_16_3)) {
1834 descr = pulldown_to_16(tree, 6, descr);
1835 }
1836 tl_assert(descr & TREE_DESCR_16_3);
1837 tree[7] = tree[6];
1838 descr &= ~TREE_DESCR_16_3;
1839 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1840 break;
1841 default:
1842 tl_assert(0);
1843 }
1844 return descr;
1845}
1846
1847
1848static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1849 UShort mask;
1850 switch (toff) {
1851 case 0:
1852 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1853 tl_assert( (descr & mask) == mask );
1854 descr &= ~mask;
1855 descr |= TREE_DESCR_16_0;
1856 break;
1857 case 2:
1858 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1859 tl_assert( (descr & mask) == mask );
1860 descr &= ~mask;
1861 descr |= TREE_DESCR_16_1;
1862 break;
1863 case 4:
1864 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1865 tl_assert( (descr & mask) == mask );
1866 descr &= ~mask;
1867 descr |= TREE_DESCR_16_2;
1868 break;
1869 case 6:
1870 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1871 tl_assert( (descr & mask) == mask );
1872 descr &= ~mask;
1873 descr |= TREE_DESCR_16_3;
1874 break;
1875 default:
1876 tl_assert(0);
1877 }
1878 return descr;
1879}
1880
1881static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1882 UShort mask;
1883 switch (toff) {
1884 case 0:
1885 if (!(descr & TREE_DESCR_16_0))
1886 descr = pullup_descr_to_16(descr, 0);
1887 if (!(descr & TREE_DESCR_16_1))
1888 descr = pullup_descr_to_16(descr, 2);
1889 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1890 tl_assert( (descr & mask) == mask );
1891 descr &= ~mask;
1892 descr |= TREE_DESCR_32_0;
1893 break;
1894 case 4:
1895 if (!(descr & TREE_DESCR_16_2))
1896 descr = pullup_descr_to_16(descr, 4);
1897 if (!(descr & TREE_DESCR_16_3))
1898 descr = pullup_descr_to_16(descr, 6);
1899 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1900 tl_assert( (descr & mask) == mask );
1901 descr &= ~mask;
1902 descr |= TREE_DESCR_32_1;
1903 break;
1904 default:
1905 tl_assert(0);
1906 }
1907 return descr;
1908}
1909
1910static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1911 switch (toff) {
1912 case 0: case 4:
1913 return 0 != (descr & TREE_DESCR_64);
1914 default:
1915 tl_assert(0);
1916 }
1917}
1918
1919static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1920 switch (toff) {
1921 case 0:
1922 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1923 case 2:
1924 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1925 case 4:
1926 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1927 case 6:
1928 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1929 default:
1930 tl_assert(0);
1931 }
1932}
1933
1934/* ------------ Cache management ------------ */
1935
1936static void zsm_flush_cache ( void )
1937{
1938 shmem__flush_and_invalidate_scache();
1939}
1940
1941
philippe1475a7f2015-05-11 19:45:08 +00001942static void zsm_init ( void )
sewardjf98e1c02008-10-25 16:22:41 +00001943{
1944 tl_assert( sizeof(UWord) == sizeof(Addr) );
1945
sewardjf98e1c02008-10-25 16:22:41 +00001946 tl_assert(map_shmem == NULL);
1947 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1948 HG_(free),
1949 NULL/*unboxed UWord cmp*/);
philippef54cb662015-05-10 22:19:31 +00001950 /* Invalidate all cache entries. */
1951 tl_assert(!is_valid_scache_tag(1));
1952 for (UWord wix = 0; wix < N_WAY_NENT; wix++) {
1953 cache_shmem.tags0[wix] = 1/*INVALID*/;
1954 }
sewardjf98e1c02008-10-25 16:22:41 +00001955
1956 /* a SecMap must contain an integral number of CacheLines */
1957 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1958 /* also ... a CacheLine holds an integral number of trees */
1959 tl_assert(0 == (N_LINE_ARANGE % 8));
1960}
1961
1962/////////////////////////////////////////////////////////////////
1963/////////////////////////////////////////////////////////////////
1964// //
1965// SECTION END compressed shadow memory //
1966// //
1967/////////////////////////////////////////////////////////////////
1968/////////////////////////////////////////////////////////////////
1969
1970
1971
1972/////////////////////////////////////////////////////////////////
1973/////////////////////////////////////////////////////////////////
1974// //
1975// SECTION BEGIN vts primitives //
1976// //
1977/////////////////////////////////////////////////////////////////
1978/////////////////////////////////////////////////////////////////
1979
sewardjf98e1c02008-10-25 16:22:41 +00001980
sewardje4cce742011-02-24 15:25:24 +00001981/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1982 being compact stand-ins for Thr*'s. Use these functions to map
1983 between them. */
1984static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
1985static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
1986
sewardje4cce742011-02-24 15:25:24 +00001987__attribute__((noreturn))
1988static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
1989{
1990 if (due_to_nThrs) {
florian6bf37262012-10-21 03:23:36 +00001991 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001992 "\n"
1993 "Helgrind: cannot continue, run aborted: too many threads.\n"
1994 "Sorry. Helgrind can only handle programs that create\n"
1995 "%'llu or fewer threads over their entire lifetime.\n"
1996 "\n";
sewardj03e7d272011-05-04 09:08:34 +00001997 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
sewardje4cce742011-02-24 15:25:24 +00001998 } else {
florian6bf37262012-10-21 03:23:36 +00001999 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00002000 "\n"
2001 "Helgrind: cannot continue, run aborted: too many\n"
2002 "synchronisation events. Sorry. Helgrind can only handle\n"
2003 "programs which perform %'llu or fewer\n"
2004 "inter-thread synchronisation events (locks, unlocks, etc).\n"
2005 "\n";
2006 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
2007 }
2008 VG_(exit)(1);
2009 /*NOTREACHED*/
2010 tl_assert(0); /*wtf?!*/
2011}
2012
2013
philippec3508652015-03-28 12:01:58 +00002014/* The dead thread (ThrID, actually) tables. A thread may only be
sewardjffce8152011-06-24 10:09:41 +00002015 listed here if we have been notified thereof by libhb_async_exit.
2016 New entries are added at the end. The order isn't important, but
philippec3508652015-03-28 12:01:58 +00002017 the ThrID values must be unique.
2018 verydead_thread_table_not_pruned lists the identity of the threads
2019 that died since the previous round of pruning.
2020 Once pruning is done, these ThrID are added in verydead_thread_table.
2021 We don't actually need to keep the set of threads that have ever died --
sewardjffce8152011-06-24 10:09:41 +00002022 only the threads that have died since the previous round of
2023 pruning. But it's useful for sanity check purposes to keep the
2024 entire set, so we do. */
philippec3508652015-03-28 12:01:58 +00002025static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
sewardjffce8152011-06-24 10:09:41 +00002026static XArray* /* of ThrID */ verydead_thread_table = NULL;
2027
2028/* Arbitrary total ordering on ThrIDs. */
florian6bd9dc12012-11-23 16:17:43 +00002029static Int cmp__ThrID ( const void* v1, const void* v2 ) {
2030 ThrID id1 = *(const ThrID*)v1;
2031 ThrID id2 = *(const ThrID*)v2;
sewardjffce8152011-06-24 10:09:41 +00002032 if (id1 < id2) return -1;
2033 if (id1 > id2) return 1;
2034 return 0;
2035}
2036
philippec3508652015-03-28 12:01:58 +00002037static void verydead_thread_tables_init ( void )
sewardjffce8152011-06-24 10:09:41 +00002038{
2039 tl_assert(!verydead_thread_table);
philippec3508652015-03-28 12:01:58 +00002040 tl_assert(!verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00002041 verydead_thread_table
2042 = VG_(newXA)( HG_(zalloc),
2043 "libhb.verydead_thread_table_init.1",
2044 HG_(free), sizeof(ThrID) );
sewardjffce8152011-06-24 10:09:41 +00002045 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
philippec3508652015-03-28 12:01:58 +00002046 verydead_thread_table_not_pruned
2047 = VG_(newXA)( HG_(zalloc),
2048 "libhb.verydead_thread_table_init.2",
2049 HG_(free), sizeof(ThrID) );
2050 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
sewardjffce8152011-06-24 10:09:41 +00002051}
2052
philippec3508652015-03-28 12:01:58 +00002053static void verydead_thread_table_sort_and_check (XArray* thrids)
2054{
2055 UWord i;
2056
2057 VG_(sortXA)( thrids );
2058 /* Sanity check: check for unique .sts.thr values. */
2059 UWord nBT = VG_(sizeXA)( thrids );
2060 if (nBT > 0) {
2061 ThrID thrid1, thrid2;
2062 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
2063 for (i = 1; i < nBT; i++) {
2064 thrid1 = thrid2;
2065 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
2066 tl_assert(thrid1 < thrid2);
2067 }
2068 }
2069 /* Ok, so the dead thread table thrids has unique and in-order keys. */
2070}
sewardjf98e1c02008-10-25 16:22:41 +00002071
2072/* A VTS contains .ts, its vector clock, and also .id, a field to hold
2073 a backlink for the caller's convenience. Since we have no idea
2074 what to set that to in the library, it always gets set to
2075 VtsID_INVALID. */
2076typedef
2077 struct {
sewardj7aa38a92011-02-27 23:04:12 +00002078 VtsID id;
2079 UInt usedTS;
2080 UInt sizeTS;
2081 ScalarTS ts[0];
sewardjf98e1c02008-10-25 16:22:41 +00002082 }
2083 VTS;
2084
sewardj7aa38a92011-02-27 23:04:12 +00002085/* Allocate a VTS capable of storing 'sizeTS' entries. */
florian6bd9dc12012-11-23 16:17:43 +00002086static VTS* VTS__new ( const HChar* who, UInt sizeTS );
sewardjf98e1c02008-10-25 16:22:41 +00002087
sewardjffce8152011-06-24 10:09:41 +00002088/* Make a clone of 'vts', sizing the new array to exactly match the
sewardj7aa38a92011-02-27 23:04:12 +00002089 number of ScalarTSs present. */
florian6bd9dc12012-11-23 16:17:43 +00002090static VTS* VTS__clone ( const HChar* who, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002091
sewardjffce8152011-06-24 10:09:41 +00002092/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
2093 array is sized exactly to hold the number of required elements.
2094 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
2095 must be in strictly increasing order. */
florian6bd9dc12012-11-23 16:17:43 +00002096static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
sewardjffce8152011-06-24 10:09:41 +00002097
sewardjf98e1c02008-10-25 16:22:41 +00002098/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00002099static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002100
sewardj7aa38a92011-02-27 23:04:12 +00002101/* Create a new singleton VTS in 'out'. Caller must have
2102 pre-allocated 'out' sufficiently big to hold the result in all
2103 possible cases. */
2104static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00002105
sewardj7aa38a92011-02-27 23:04:12 +00002106/* Create in 'out' a VTS which is the same as 'vts' except with
2107 vts[me]++, so to speak. Caller must have pre-allocated 'out'
2108 sufficiently big to hold the result in all possible cases. */
2109static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002110
sewardj7aa38a92011-02-27 23:04:12 +00002111/* Create in 'out' a VTS which is the join (max) of 'a' and
2112 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
2113 the result in all possible cases. */
2114static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002115
sewardj23f12002009-07-24 08:45:08 +00002116/* Compute the partial ordering relation of the two args. Although we
2117 could be completely general and return an enumeration value (EQ,
2118 LT, GT, UN), in fact we only need LEQ, and so we may as well
2119 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00002120
sewardje4cce742011-02-24 15:25:24 +00002121 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
2122 invald ThrID). In the latter case, the returned ThrID indicates
2123 the discovered point for which they are not. There may be more
2124 than one such point, but we only care about seeing one of them, not
2125 all of them. This rather strange convention is used because
2126 sometimes we want to know the actual index at which they first
2127 differ. */
2128static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002129
2130/* Compute an arbitrary structural (total) ordering on the two args,
2131 based on their VCs, so they can be looked up in a table, tree, etc.
2132 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00002133static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00002134
florianb28fe892014-10-28 20:52:07 +00002135/* Debugging only. Display the given VTS. */
2136static void VTS__show ( const VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00002137
2138/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00002139static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002140
sewardjffce8152011-06-24 10:09:41 +00002141/* Notify the VTS machinery that a thread has been declared
2142 comprehensively dead: that is, it has done an async exit AND it has
2143 been joined with. This should ensure that its local clocks (.viR
2144 and .viW) will never again change, and so all mentions of this
2145 thread from all VTSs in the system may be removed. */
2146static void VTS__declare_thread_very_dead ( Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00002147
2148/*--------------- to do with Vector Timestamps ---------------*/
2149
sewardjf98e1c02008-10-25 16:22:41 +00002150static Bool is_sane_VTS ( VTS* vts )
2151{
2152 UWord i, n;
2153 ScalarTS *st1, *st2;
2154 if (!vts) return False;
sewardj555fc572011-02-27 23:39:53 +00002155 if (vts->usedTS > vts->sizeTS) return False;
sewardj7aa38a92011-02-27 23:04:12 +00002156 n = vts->usedTS;
2157 if (n == 1) {
2158 st1 = &vts->ts[0];
2159 if (st1->tym == 0)
2160 return False;
2161 }
2162 else
sewardjf98e1c02008-10-25 16:22:41 +00002163 if (n >= 2) {
2164 for (i = 0; i < n-1; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002165 st1 = &vts->ts[i];
2166 st2 = &vts->ts[i+1];
sewardje4cce742011-02-24 15:25:24 +00002167 if (st1->thrid >= st2->thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002168 return False;
2169 if (st1->tym == 0 || st2->tym == 0)
2170 return False;
2171 }
2172 }
2173 return True;
2174}
2175
2176
sewardj7aa38a92011-02-27 23:04:12 +00002177/* Create a new, empty VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002178*/
florian6bd9dc12012-11-23 16:17:43 +00002179static VTS* VTS__new ( const HChar* who, UInt sizeTS )
sewardjf98e1c02008-10-25 16:22:41 +00002180{
sewardj7aa38a92011-02-27 23:04:12 +00002181 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
2182 tl_assert(vts->usedTS == 0);
2183 vts->sizeTS = sizeTS;
2184 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
sewardjf98e1c02008-10-25 16:22:41 +00002185 return vts;
2186}
2187
sewardj7aa38a92011-02-27 23:04:12 +00002188/* Clone this VTS.
2189*/
florian6bd9dc12012-11-23 16:17:43 +00002190static VTS* VTS__clone ( const HChar* who, VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002191{
2192 tl_assert(vts);
2193 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2194 UInt nTS = vts->usedTS;
2195 VTS* clone = VTS__new(who, nTS);
2196 clone->id = vts->id;
2197 clone->sizeTS = nTS;
2198 clone->usedTS = nTS;
2199 UInt i;
2200 for (i = 0; i < nTS; i++) {
2201 clone->ts[i] = vts->ts[i];
2202 }
2203 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2204 return clone;
2205}
2206
sewardjf98e1c02008-10-25 16:22:41 +00002207
sewardjffce8152011-06-24 10:09:41 +00002208/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
2209 must be in strictly increasing order. We could obviously do this
2210 much more efficiently (in linear time) if necessary.
2211*/
florian6bd9dc12012-11-23 16:17:43 +00002212static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
sewardjffce8152011-06-24 10:09:41 +00002213{
2214 UInt i, j;
2215 tl_assert(vts);
2216 tl_assert(thridsToDel);
2217 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2218 UInt nTS = vts->usedTS;
2219 /* Figure out how many ScalarTSs will remain in the output. */
2220 UInt nReq = nTS;
2221 for (i = 0; i < nTS; i++) {
2222 ThrID thrid = vts->ts[i].thrid;
2223 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2224 nReq--;
2225 }
2226 tl_assert(nReq <= nTS);
2227 /* Copy the ones that will remain. */
2228 VTS* res = VTS__new(who, nReq);
2229 j = 0;
2230 for (i = 0; i < nTS; i++) {
2231 ThrID thrid = vts->ts[i].thrid;
2232 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2233 continue;
2234 res->ts[j++] = vts->ts[i];
2235 }
2236 tl_assert(j == nReq);
2237 tl_assert(j == res->sizeTS);
2238 res->usedTS = j;
2239 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2240 return res;
2241}
2242
2243
sewardjf98e1c02008-10-25 16:22:41 +00002244/* Delete this VTS in its entirety.
2245*/
sewardj7aa38a92011-02-27 23:04:12 +00002246static void VTS__delete ( VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002247{
2248 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002249 tl_assert(vts->usedTS <= vts->sizeTS);
2250 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
sewardjf98e1c02008-10-25 16:22:41 +00002251 HG_(free)(vts);
2252}
2253
2254
2255/* Create a new singleton VTS.
2256*/
sewardj7aa38a92011-02-27 23:04:12 +00002257static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2258{
sewardjf98e1c02008-10-25 16:22:41 +00002259 tl_assert(thr);
2260 tl_assert(tym >= 1);
sewardj7aa38a92011-02-27 23:04:12 +00002261 tl_assert(out);
2262 tl_assert(out->usedTS == 0);
2263 tl_assert(out->sizeTS >= 1);
2264 UInt hi = out->usedTS++;
2265 out->ts[hi].thrid = Thr__to_ThrID(thr);
2266 out->ts[hi].tym = tym;
sewardjf98e1c02008-10-25 16:22:41 +00002267}
2268
2269
2270/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2271 not modified.
2272*/
sewardj7aa38a92011-02-27 23:04:12 +00002273static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002274{
sewardj7aa38a92011-02-27 23:04:12 +00002275 UInt i, n;
sewardje4cce742011-02-24 15:25:24 +00002276 ThrID me_thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002277 Bool found = False;
sewardjc8028ad2010-05-05 09:34:42 +00002278
2279 stats__vts__tick++;
2280
sewardj7aa38a92011-02-27 23:04:12 +00002281 tl_assert(out);
2282 tl_assert(out->usedTS == 0);
2283 if (vts->usedTS >= ThrID_MAX_VALID)
2284 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2285 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2286
sewardjf98e1c02008-10-25 16:22:41 +00002287 tl_assert(me);
sewardje4cce742011-02-24 15:25:24 +00002288 me_thrid = Thr__to_ThrID(me);
sewardjf98e1c02008-10-25 16:22:41 +00002289 tl_assert(is_sane_VTS(vts));
sewardj7aa38a92011-02-27 23:04:12 +00002290 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002291
sewardj555fc572011-02-27 23:39:53 +00002292 /* Copy all entries which precede 'me'. */
2293 for (i = 0; i < n; i++) {
2294 ScalarTS* here = &vts->ts[i];
2295 if (UNLIKELY(here->thrid >= me_thrid))
2296 break;
2297 UInt hi = out->usedTS++;
2298 out->ts[hi] = *here;
2299 }
2300
2301 /* 'i' now indicates the next entry to copy, if any.
2302 There are 3 possibilities:
2303 (a) there is no next entry (we used them all up already):
2304 add (me_thrid,1) to the output, and quit
2305 (b) there is a next entry, and its thrid > me_thrid:
2306 add (me_thrid,1) to the output, then copy the remaining entries
2307 (c) there is a next entry, and its thrid == me_thrid:
2308 copy it to the output but increment its timestamp value.
2309 Then copy the remaining entries. (c) is the common case.
2310 */
2311 tl_assert(i >= 0 && i <= n);
2312 if (i == n) { /* case (a) */
sewardj7aa38a92011-02-27 23:04:12 +00002313 UInt hi = out->usedTS++;
2314 out->ts[hi].thrid = me_thrid;
2315 out->ts[hi].tym = 1;
sewardj555fc572011-02-27 23:39:53 +00002316 } else {
2317 /* cases (b) and (c) */
2318 ScalarTS* here = &vts->ts[i];
2319 if (me_thrid == here->thrid) { /* case (c) */
sewardj7aa38a92011-02-27 23:04:12 +00002320 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
sewardje4cce742011-02-24 15:25:24 +00002321 /* We're hosed. We have to stop. */
2322 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2323 }
sewardj7aa38a92011-02-27 23:04:12 +00002324 UInt hi = out->usedTS++;
2325 out->ts[hi].thrid = here->thrid;
2326 out->ts[hi].tym = here->tym + 1;
sewardjf98e1c02008-10-25 16:22:41 +00002327 i++;
sewardj555fc572011-02-27 23:39:53 +00002328 found = True;
2329 } else { /* case (b) */
sewardj7aa38a92011-02-27 23:04:12 +00002330 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002331 out->ts[hi].thrid = me_thrid;
2332 out->ts[hi].tym = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002333 }
sewardj555fc572011-02-27 23:39:53 +00002334 /* And copy any remaining entries. */
sewardjf98e1c02008-10-25 16:22:41 +00002335 for (/*keepgoing*/; i < n; i++) {
sewardj555fc572011-02-27 23:39:53 +00002336 ScalarTS* here2 = &vts->ts[i];
sewardj7aa38a92011-02-27 23:04:12 +00002337 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002338 out->ts[hi] = *here2;
sewardjf98e1c02008-10-25 16:22:41 +00002339 }
2340 }
sewardj555fc572011-02-27 23:39:53 +00002341
sewardj7aa38a92011-02-27 23:04:12 +00002342 tl_assert(is_sane_VTS(out));
2343 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2344 tl_assert(out->usedTS <= out->sizeTS);
sewardjf98e1c02008-10-25 16:22:41 +00002345}
2346
2347
2348/* Return a new VTS constructed as the join (max) of the 2 args.
2349 Neither arg is modified.
2350*/
sewardj7aa38a92011-02-27 23:04:12 +00002351static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002352{
sewardj7aa38a92011-02-27 23:04:12 +00002353 UInt ia, ib, useda, usedb;
sewardjf98e1c02008-10-25 16:22:41 +00002354 ULong tyma, tymb, tymMax;
sewardje4cce742011-02-24 15:25:24 +00002355 ThrID thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002356 UInt ncommon = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002357
sewardjc8028ad2010-05-05 09:34:42 +00002358 stats__vts__join++;
2359
sewardj7aa38a92011-02-27 23:04:12 +00002360 tl_assert(a);
2361 tl_assert(b);
2362 useda = a->usedTS;
2363 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002364
sewardj7aa38a92011-02-27 23:04:12 +00002365 tl_assert(out);
2366 tl_assert(out->usedTS == 0);
2367 /* overly conservative test, but doing better involves comparing
2368 the two VTSs, which we don't want to do at this point. */
2369 if (useda + usedb >= ThrID_MAX_VALID)
2370 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2371 tl_assert(out->sizeTS >= useda + usedb);
2372
sewardjf98e1c02008-10-25 16:22:41 +00002373 ia = ib = 0;
2374
2375 while (1) {
2376
sewardje4cce742011-02-24 15:25:24 +00002377 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2378 from a and b in order, where thrid is the next ThrID
sewardjf98e1c02008-10-25 16:22:41 +00002379 occurring in either a or b, and tyma/b are the relevant
2380 scalar timestamps, taking into account implicit zeroes. */
2381 tl_assert(ia >= 0 && ia <= useda);
2382 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002383
njn4c245e52009-03-15 23:25:38 +00002384 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002385 /* both empty - done */
2386 break;
njn4c245e52009-03-15 23:25:38 +00002387
2388 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002389 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002390 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002391 thrid = tmpb->thrid;
2392 tyma = 0;
2393 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002394 ib++;
njn4c245e52009-03-15 23:25:38 +00002395
2396 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002397 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002398 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002399 thrid = tmpa->thrid;
2400 tyma = tmpa->tym;
2401 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002402 ia++;
njn4c245e52009-03-15 23:25:38 +00002403
2404 } else {
sewardje4cce742011-02-24 15:25:24 +00002405 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002406 ScalarTS* tmpa = &a->ts[ia];
2407 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002408 if (tmpa->thrid < tmpb->thrid) {
2409 /* a has the lowest unconsidered ThrID */
2410 thrid = tmpa->thrid;
2411 tyma = tmpa->tym;
2412 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002413 ia++;
sewardje4cce742011-02-24 15:25:24 +00002414 } else if (tmpa->thrid > tmpb->thrid) {
2415 /* b has the lowest unconsidered ThrID */
2416 thrid = tmpb->thrid;
2417 tyma = 0;
2418 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002419 ib++;
2420 } else {
sewardje4cce742011-02-24 15:25:24 +00002421 /* they both next mention the same ThrID */
2422 tl_assert(tmpa->thrid == tmpb->thrid);
2423 thrid = tmpa->thrid; /* == tmpb->thrid */
2424 tyma = tmpa->tym;
2425 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002426 ia++;
2427 ib++;
sewardj7aa38a92011-02-27 23:04:12 +00002428 ncommon++;
sewardjf98e1c02008-10-25 16:22:41 +00002429 }
2430 }
2431
2432 /* having laboriously determined (thr, tyma, tymb), do something
2433 useful with it. */
2434 tymMax = tyma > tymb ? tyma : tymb;
2435 if (tymMax > 0) {
sewardj7aa38a92011-02-27 23:04:12 +00002436 UInt hi = out->usedTS++;
2437 out->ts[hi].thrid = thrid;
2438 out->ts[hi].tym = tymMax;
sewardjf98e1c02008-10-25 16:22:41 +00002439 }
2440
2441 }
2442
sewardj7aa38a92011-02-27 23:04:12 +00002443 tl_assert(is_sane_VTS(out));
2444 tl_assert(out->usedTS <= out->sizeTS);
2445 tl_assert(out->usedTS == useda + usedb - ncommon);
sewardjf98e1c02008-10-25 16:22:41 +00002446}
2447
2448
sewardje4cce742011-02-24 15:25:24 +00002449/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2450 they are, or the first ThrID for which they are not (no valid ThrID
2451 has the value zero). This rather strange convention is used
2452 because sometimes we want to know the actual index at which they
2453 first differ. */
2454static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002455{
sewardj23f12002009-07-24 08:45:08 +00002456 Word ia, ib, useda, usedb;
2457 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00002458
sewardjc8028ad2010-05-05 09:34:42 +00002459 stats__vts__cmpLEQ++;
2460
sewardj7aa38a92011-02-27 23:04:12 +00002461 tl_assert(a);
2462 tl_assert(b);
2463 useda = a->usedTS;
2464 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002465
2466 ia = ib = 0;
2467
2468 while (1) {
2469
njn4c245e52009-03-15 23:25:38 +00002470 /* This logic is to enumerate doubles (tyma, tymb) drawn
2471 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00002472 scalar timestamps, taking into account implicit zeroes. */
sewardje4cce742011-02-24 15:25:24 +00002473 ThrID thrid;
sewardj23f12002009-07-24 08:45:08 +00002474
sewardjf98e1c02008-10-25 16:22:41 +00002475 tl_assert(ia >= 0 && ia <= useda);
2476 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002477
njn4c245e52009-03-15 23:25:38 +00002478 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002479 /* both empty - done */
2480 break;
njn4c245e52009-03-15 23:25:38 +00002481
2482 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002483 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002484 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002485 tyma = 0;
2486 tymb = tmpb->tym;
2487 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002488 ib++;
njn4c245e52009-03-15 23:25:38 +00002489
2490 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002491 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002492 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002493 tyma = tmpa->tym;
2494 thrid = tmpa->thrid;
2495 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002496 ia++;
njn4c245e52009-03-15 23:25:38 +00002497
2498 } else {
sewardje4cce742011-02-24 15:25:24 +00002499 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002500 ScalarTS* tmpa = &a->ts[ia];
2501 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002502 if (tmpa->thrid < tmpb->thrid) {
2503 /* a has the lowest unconsidered ThrID */
2504 tyma = tmpa->tym;
2505 thrid = tmpa->thrid;
2506 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002507 ia++;
2508 }
2509 else
sewardje4cce742011-02-24 15:25:24 +00002510 if (tmpa->thrid > tmpb->thrid) {
2511 /* b has the lowest unconsidered ThrID */
2512 tyma = 0;
2513 tymb = tmpb->tym;
2514 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002515 ib++;
2516 } else {
sewardje4cce742011-02-24 15:25:24 +00002517 /* they both next mention the same ThrID */
2518 tl_assert(tmpa->thrid == tmpb->thrid);
2519 tyma = tmpa->tym;
2520 thrid = tmpa->thrid;
2521 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002522 ia++;
2523 ib++;
2524 }
2525 }
2526
njn4c245e52009-03-15 23:25:38 +00002527 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00002528 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00002529 if (tyma > tymb) {
2530 /* not LEQ at this index. Quit, since the answer is
2531 determined already. */
sewardje4cce742011-02-24 15:25:24 +00002532 tl_assert(thrid >= 1024);
2533 return thrid;
sewardj23f12002009-07-24 08:45:08 +00002534 }
sewardjf98e1c02008-10-25 16:22:41 +00002535 }
2536
sewardje4cce742011-02-24 15:25:24 +00002537 return 0; /* all points are LEQ => return an invalid ThrID */
sewardjf98e1c02008-10-25 16:22:41 +00002538}
2539
2540
2541/* Compute an arbitrary structural (total) ordering on the two args,
2542 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00002543 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2544 performance critical so there is some effort expended to make it sa
2545 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00002546*/
2547Word VTS__cmp_structural ( VTS* a, VTS* b )
2548{
2549 /* We just need to generate an arbitrary total ordering based on
2550 a->ts and b->ts. Preferably do it in a way which comes across likely
2551 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00002552 Word i;
2553 Word useda = 0, usedb = 0;
2554 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002555
sewardjc8028ad2010-05-05 09:34:42 +00002556 stats__vts__cmp_structural++;
2557
2558 tl_assert(a);
2559 tl_assert(b);
2560
sewardj7aa38a92011-02-27 23:04:12 +00002561 ctsa = &a->ts[0]; useda = a->usedTS;
2562 ctsb = &b->ts[0]; usedb = b->usedTS;
sewardjc8028ad2010-05-05 09:34:42 +00002563
2564 if (LIKELY(useda == usedb)) {
2565 ScalarTS *tmpa = NULL, *tmpb = NULL;
2566 stats__vts__cmp_structural_slow++;
2567 /* Same length vectors. Find the first difference, if any, as
2568 fast as possible. */
2569 for (i = 0; i < useda; i++) {
2570 tmpa = &ctsa[i];
2571 tmpb = &ctsb[i];
sewardje4cce742011-02-24 15:25:24 +00002572 if (LIKELY(tmpa->tym == tmpb->tym
2573 && tmpa->thrid == tmpb->thrid))
sewardjc8028ad2010-05-05 09:34:42 +00002574 continue;
2575 else
2576 break;
2577 }
2578 if (UNLIKELY(i == useda)) {
2579 /* They're identical. */
2580 return 0;
2581 } else {
2582 tl_assert(i >= 0 && i < useda);
2583 if (tmpa->tym < tmpb->tym) return -1;
2584 if (tmpa->tym > tmpb->tym) return 1;
sewardje4cce742011-02-24 15:25:24 +00002585 if (tmpa->thrid < tmpb->thrid) return -1;
2586 if (tmpa->thrid > tmpb->thrid) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002587 /* we just established them as non-identical, hence: */
2588 }
2589 /*NOTREACHED*/
2590 tl_assert(0);
2591 }
sewardjf98e1c02008-10-25 16:22:41 +00002592
2593 if (useda < usedb) return -1;
2594 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002595 /*NOTREACHED*/
2596 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00002597}
2598
2599
florianb28fe892014-10-28 20:52:07 +00002600/* Debugging only. Display the given VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002601*/
florianb28fe892014-10-28 20:52:07 +00002602static void VTS__show ( const VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002603{
sewardjf98e1c02008-10-25 16:22:41 +00002604 Word i, n;
florian4367abe2015-02-28 09:22:09 +00002605 tl_assert(vts);
florianb28fe892014-10-28 20:52:07 +00002606
2607 VG_(printf)("[");
sewardj7aa38a92011-02-27 23:04:12 +00002608 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002609 for (i = 0; i < n; i++) {
florianb28fe892014-10-28 20:52:07 +00002610 const ScalarTS *st = &vts->ts[i];
2611 VG_(printf)(i < n-1 ? "%u:%llu " : "%u:%llu", st->thrid, (ULong)st->tym);
sewardjf98e1c02008-10-25 16:22:41 +00002612 }
florianb28fe892014-10-28 20:52:07 +00002613 VG_(printf)("]");
sewardjf98e1c02008-10-25 16:22:41 +00002614}
2615
2616
2617/* Debugging only. Return vts[index], so to speak.
2618*/
sewardj7aa38a92011-02-27 23:04:12 +00002619ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2620{
sewardjf98e1c02008-10-25 16:22:41 +00002621 UWord i, n;
sewardje4cce742011-02-24 15:25:24 +00002622 ThrID idx_thrid = Thr__to_ThrID(idx);
sewardjc8028ad2010-05-05 09:34:42 +00002623 stats__vts__indexat_slow++;
florian4367abe2015-02-28 09:22:09 +00002624 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002625 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002626 for (i = 0; i < n; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002627 ScalarTS* st = &vts->ts[i];
sewardje4cce742011-02-24 15:25:24 +00002628 if (st->thrid == idx_thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002629 return st->tym;
2630 }
2631 return 0;
2632}
2633
2634
sewardjffce8152011-06-24 10:09:41 +00002635/* See comment on prototype above.
2636*/
2637static void VTS__declare_thread_very_dead ( Thr* thr )
2638{
2639 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2640
2641 tl_assert(thr->llexit_done);
2642 tl_assert(thr->joinedwith_done);
2643
2644 ThrID nyu;
2645 nyu = Thr__to_ThrID(thr);
philippec3508652015-03-28 12:01:58 +00002646 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
sewardjffce8152011-06-24 10:09:41 +00002647
2648 /* We can only get here if we're assured that we'll never again
2649 need to look at this thread's ::viR or ::viW. Set them to
2650 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2651 mostly so that we don't wind up pruning them (as that would be
2652 nonsensical: the only interesting ScalarTS entry for a dead
2653 thread is its own index, and the pruning will remove that.). */
2654 VtsID__rcdec(thr->viR);
2655 VtsID__rcdec(thr->viW);
2656 thr->viR = VtsID_INVALID;
2657 thr->viW = VtsID_INVALID;
2658}
2659
2660
sewardjf98e1c02008-10-25 16:22:41 +00002661/////////////////////////////////////////////////////////////////
2662/////////////////////////////////////////////////////////////////
2663// //
2664// SECTION END vts primitives //
2665// //
2666/////////////////////////////////////////////////////////////////
2667/////////////////////////////////////////////////////////////////
2668
2669
2670
2671/////////////////////////////////////////////////////////////////
2672/////////////////////////////////////////////////////////////////
2673// //
2674// SECTION BEGIN main library //
2675// //
2676/////////////////////////////////////////////////////////////////
2677/////////////////////////////////////////////////////////////////
2678
2679
2680/////////////////////////////////////////////////////////
2681// //
2682// VTS set //
2683// //
2684/////////////////////////////////////////////////////////
2685
sewardjffce8152011-06-24 10:09:41 +00002686static WordFM* /* WordFM VTS* void */ vts_set = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002687
2688static void vts_set_init ( void )
2689{
2690 tl_assert(!vts_set);
2691 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2692 HG_(free),
2693 (Word(*)(UWord,UWord))VTS__cmp_structural );
sewardjf98e1c02008-10-25 16:22:41 +00002694}
2695
sewardj7aa38a92011-02-27 23:04:12 +00002696/* Given a VTS, look in vts_set to see if we already have a
2697 structurally identical one. If yes, return the pair (True, pointer
2698 to the existing one). If no, clone this one, add the clone to the
2699 set, and return (False, pointer to the clone). */
2700static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002701{
2702 UWord keyW, valW;
sewardj7aa38a92011-02-27 23:04:12 +00002703 stats__vts_set__focaa++;
2704 tl_assert(cand->id == VtsID_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00002705 /* lookup cand (by value) */
2706 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2707 /* found it */
2708 tl_assert(valW == 0);
2709 /* if this fails, cand (by ref) was already present (!) */
2710 tl_assert(keyW != (UWord)cand);
sewardj7aa38a92011-02-27 23:04:12 +00002711 *res = (VTS*)keyW;
2712 return True;
sewardjf98e1c02008-10-25 16:22:41 +00002713 } else {
sewardj7aa38a92011-02-27 23:04:12 +00002714 /* not present. Clone, add and return address of clone. */
2715 stats__vts_set__focaa_a++;
2716 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2717 tl_assert(clone != cand);
2718 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2719 *res = clone;
2720 return False;
sewardjf98e1c02008-10-25 16:22:41 +00002721 }
2722}
2723
2724
2725/////////////////////////////////////////////////////////
2726// //
2727// VTS table //
2728// //
2729/////////////////////////////////////////////////////////
2730
2731static void VtsID__invalidate_caches ( void ); /* fwds */
2732
2733/* A type to hold VTS table entries. Invariants:
2734 If .vts == NULL, then this entry is not in use, so:
2735 - .rc == 0
2736 - this entry is on the freelist (unfortunately, does not imply
philippea1ac2f42015-05-01 17:12:00 +00002737 any constraints on value for u.freelink)
sewardjf98e1c02008-10-25 16:22:41 +00002738 If .vts != NULL, then this entry is in use:
2739 - .vts is findable in vts_set
2740 - .vts->id == this entry number
2741 - no specific value for .rc (even 0 is OK)
philippea1ac2f42015-05-01 17:12:00 +00002742 - this entry is not on freelist, so u.freelink == VtsID_INVALID
sewardjf98e1c02008-10-25 16:22:41 +00002743*/
2744typedef
2745 struct {
2746 VTS* vts; /* vts, in vts_set */
2747 UWord rc; /* reference count - enough for entire aspace */
philippea1ac2f42015-05-01 17:12:00 +00002748 union {
2749 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2750 VtsID remap; /* used only during pruning, for used entries */
2751 } u;
2752 /* u.freelink only used when vts == NULL,
2753 u.remap only used when vts != NULL, during pruning. */
sewardjf98e1c02008-10-25 16:22:41 +00002754 }
2755 VtsTE;
2756
2757/* The VTS table. */
2758static XArray* /* of VtsTE */ vts_tab = NULL;
2759
2760/* An index into the VTS table, indicating the start of the list of
2761 free (available for use) entries. If the list is empty, this is
2762 VtsID_INVALID. */
2763static VtsID vts_tab_freelist = VtsID_INVALID;
2764
2765/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2766 vts_tab equals or exceeds this size. After GC, the value here is
2767 set appropriately so as to check for the next GC point. */
2768static Word vts_next_GC_at = 1000;
2769
2770static void vts_tab_init ( void )
2771{
florian91ed8cc2014-09-15 18:50:17 +00002772 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2773 HG_(free), sizeof(VtsTE) );
2774 vts_tab_freelist = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002775}
2776
2777/* Add ii to the free list, checking that it looks out-of-use. */
2778static void add_to_free_list ( VtsID ii )
2779{
2780 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2781 tl_assert(ie->vts == NULL);
2782 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002783 tl_assert(ie->u.freelink == VtsID_INVALID);
2784 ie->u.freelink = vts_tab_freelist;
sewardjf98e1c02008-10-25 16:22:41 +00002785 vts_tab_freelist = ii;
2786}
2787
2788/* Get an entry from the free list. This will return VtsID_INVALID if
2789 the free list is empty. */
2790static VtsID get_from_free_list ( void )
2791{
2792 VtsID ii;
2793 VtsTE* ie;
2794 if (vts_tab_freelist == VtsID_INVALID)
2795 return VtsID_INVALID;
2796 ii = vts_tab_freelist;
2797 ie = VG_(indexXA)( vts_tab, ii );
2798 tl_assert(ie->vts == NULL);
2799 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002800 vts_tab_freelist = ie->u.freelink;
sewardjf98e1c02008-10-25 16:22:41 +00002801 return ii;
2802}
2803
2804/* Produce a new VtsID that can be used, either by getting it from
2805 the freelist, or, if that is empty, by expanding vts_tab. */
2806static VtsID get_new_VtsID ( void )
2807{
2808 VtsID ii;
2809 VtsTE te;
2810 ii = get_from_free_list();
2811 if (ii != VtsID_INVALID)
2812 return ii;
2813 te.vts = NULL;
2814 te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002815 te.u.freelink = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002816 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2817 return ii;
2818}
2819
2820
2821/* Indirect callback from lib_zsm. */
2822static void VtsID__rcinc ( VtsID ii )
2823{
2824 VtsTE* ie;
2825 /* VG_(indexXA) does a range check for us */
2826 ie = VG_(indexXA)( vts_tab, ii );
2827 tl_assert(ie->vts); /* else it's not in use */
2828 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2829 tl_assert(ie->vts->id == ii);
2830 ie->rc++;
2831}
2832
2833/* Indirect callback from lib_zsm. */
2834static void VtsID__rcdec ( VtsID ii )
2835{
2836 VtsTE* ie;
2837 /* VG_(indexXA) does a range check for us */
2838 ie = VG_(indexXA)( vts_tab, ii );
2839 tl_assert(ie->vts); /* else it's not in use */
2840 tl_assert(ie->rc > 0); /* else RC snafu */
2841 tl_assert(ie->vts->id == ii);
2842 ie->rc--;
2843}
2844
2845
sewardj7aa38a92011-02-27 23:04:12 +00002846/* Look up 'cand' in our collection of VTSs. If present, return the
2847 VtsID for the pre-existing version. If not present, clone it, add
2848 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2849 it, and return that. */
2850static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002851{
sewardj7aa38a92011-02-27 23:04:12 +00002852 VTS* in_tab = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002853 tl_assert(cand->id == VtsID_INVALID);
sewardj7aa38a92011-02-27 23:04:12 +00002854 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2855 tl_assert(in_tab);
2856 if (already_have) {
2857 /* We already have a copy of 'cand'. Use that. */
sewardjf98e1c02008-10-25 16:22:41 +00002858 VtsTE* ie;
sewardj7aa38a92011-02-27 23:04:12 +00002859 tl_assert(in_tab->id != VtsID_INVALID);
2860 ie = VG_(indexXA)( vts_tab, in_tab->id );
2861 tl_assert(ie->vts == in_tab);
2862 return in_tab->id;
sewardjf98e1c02008-10-25 16:22:41 +00002863 } else {
2864 VtsID ii = get_new_VtsID();
2865 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
sewardj7aa38a92011-02-27 23:04:12 +00002866 ie->vts = in_tab;
sewardjf98e1c02008-10-25 16:22:41 +00002867 ie->rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002868 ie->u.freelink = VtsID_INVALID;
sewardj7aa38a92011-02-27 23:04:12 +00002869 in_tab->id = ii;
sewardjf98e1c02008-10-25 16:22:41 +00002870 return ii;
2871 }
2872}
2873
2874
florian6bd9dc12012-11-23 16:17:43 +00002875static void show_vts_stats ( const HChar* caller )
sewardjf98e1c02008-10-25 16:22:41 +00002876{
2877 UWord nSet, nTab, nLive;
2878 ULong totrc;
2879 UWord n, i;
2880 nSet = VG_(sizeFM)( vts_set );
2881 nTab = VG_(sizeXA)( vts_tab );
2882 totrc = 0;
2883 nLive = 0;
2884 n = VG_(sizeXA)( vts_tab );
2885 for (i = 0; i < n; i++) {
2886 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2887 if (ie->vts) {
2888 nLive++;
2889 totrc += (ULong)ie->rc;
2890 } else {
2891 tl_assert(ie->rc == 0);
2892 }
2893 }
2894 VG_(printf)(" show_vts_stats %s\n", caller);
2895 VG_(printf)(" vts_tab size %4lu\n", nTab);
2896 VG_(printf)(" vts_tab live %4lu\n", nLive);
2897 VG_(printf)(" vts_set size %4lu\n", nSet);
2898 VG_(printf)(" total rc %4llu\n", totrc);
2899}
2900
sewardjffce8152011-06-24 10:09:41 +00002901
2902/* --- Helpers for VtsID pruning --- */
2903
2904static
2905void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2906 /*MOD*/XArray* /* of VtsTE */ new_tab,
2907 VtsID* ii )
2908{
2909 VtsTE *old_te, *new_te;
2910 VtsID old_id, new_id;
2911 /* We're relying here on VG_(indexXA)'s range checking to assert on
2912 any stupid values, in particular *ii == VtsID_INVALID. */
2913 old_id = *ii;
2914 old_te = VG_(indexXA)( old_tab, old_id );
2915 old_te->rc--;
philippea1ac2f42015-05-01 17:12:00 +00002916 new_id = old_te->u.remap;
sewardjffce8152011-06-24 10:09:41 +00002917 new_te = VG_(indexXA)( new_tab, new_id );
2918 new_te->rc++;
2919 *ii = new_id;
2920}
2921
2922static
2923void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2924 /*MOD*/XArray* /* of VtsTE */ new_tab,
2925 SVal* s )
2926{
2927 SVal old_sv, new_sv;
2928 old_sv = *s;
2929 if (SVal__isC(old_sv)) {
2930 VtsID rMin, wMin;
2931 rMin = SVal__unC_Rmin(old_sv);
2932 wMin = SVal__unC_Wmin(old_sv);
2933 remap_VtsID( old_tab, new_tab, &rMin );
2934 remap_VtsID( old_tab, new_tab, &wMin );
2935 new_sv = SVal__mkC( rMin, wMin );
2936 *s = new_sv;
2937 }
2938}
2939
2940
sewardjf98e1c02008-10-25 16:22:41 +00002941/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002942__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002943static void vts_tab__do_GC ( Bool show_stats )
2944{
2945 UWord i, nTab, nLive, nFreed;
2946
sewardjffce8152011-06-24 10:09:41 +00002947 /* ---------- BEGIN VTS GC ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00002948 /* check this is actually necessary. */
2949 tl_assert(vts_tab_freelist == VtsID_INVALID);
2950
2951 /* empty the caches for partial order checks and binary joins. We
2952 could do better and prune out the entries to be deleted, but it
2953 ain't worth the hassle. */
2954 VtsID__invalidate_caches();
2955
2956 /* First, make the reference counts up to date. */
2957 zsm_flush_cache();
2958
2959 nTab = VG_(sizeXA)( vts_tab );
2960
2961 if (show_stats) {
2962 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2963 show_vts_stats("before GC");
2964 }
2965
sewardjffce8152011-06-24 10:09:41 +00002966 /* Now we can inspect the entire vts_tab. Any entries with zero
2967 .rc fields are now no longer in use and can be put back on the
sewardjf98e1c02008-10-25 16:22:41 +00002968 free list, removed from vts_set, and deleted. */
2969 nFreed = 0;
2970 for (i = 0; i < nTab; i++) {
2971 Bool present;
sewardjffce8152011-06-24 10:09:41 +00002972 UWord oldK = 0, oldV = 12345;
sewardjf98e1c02008-10-25 16:22:41 +00002973 VtsTE* te = VG_(indexXA)( vts_tab, i );
2974 if (te->vts == NULL) {
2975 tl_assert(te->rc == 0);
2976 continue; /* already on the free list (presumably) */
2977 }
2978 if (te->rc > 0)
2979 continue; /* in use */
2980 /* Ok, we got one we can free. */
2981 tl_assert(te->vts->id == i);
2982 /* first, remove it from vts_set. */
2983 present = VG_(delFromFM)( vts_set,
2984 &oldK, &oldV, (UWord)te->vts );
2985 tl_assert(present); /* else it isn't in vts_set ?! */
2986 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2987 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2988 /* now free the VTS itself */
2989 VTS__delete(te->vts);
2990 te->vts = NULL;
2991 /* and finally put this entry on the free list */
philippea1ac2f42015-05-01 17:12:00 +00002992 tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
sewardjf98e1c02008-10-25 16:22:41 +00002993 add_to_free_list( i );
2994 nFreed++;
2995 }
2996
2997 /* Now figure out when the next GC should be. We'll allow the
2998 number of VTSs to double before GCing again. Except of course
2999 that since we can't (or, at least, don't) shrink vts_tab, we
3000 can't set the threshhold value smaller than it. */
3001 tl_assert(nFreed <= nTab);
3002 nLive = nTab - nFreed;
3003 tl_assert(nLive >= 0 && nLive <= nTab);
3004 vts_next_GC_at = 2 * nLive;
3005 if (vts_next_GC_at < nTab)
3006 vts_next_GC_at = nTab;
3007
3008 if (show_stats) {
3009 show_vts_stats("after GC");
3010 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
3011 }
3012
sewardj5e2ac3b2009-08-11 10:39:25 +00003013 if (VG_(clo_stats)) {
sewardjf98e1c02008-10-25 16:22:41 +00003014 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00003015 VG_(message)(Vg_DebugMsg,
philippef54cb662015-05-10 22:19:31 +00003016 "libhb: VTS GC: #%lu old size %lu live %lu (%2llu%%)\n",
3017 stats__vts_tab_GC,
3018 nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00003019 }
sewardjffce8152011-06-24 10:09:41 +00003020 /* ---------- END VTS GC ---------- */
3021
3022 /* Decide whether to do VTS pruning. We have one of three
3023 settings. */
3024 static UInt pruning_auto_ctr = 0; /* do not make non-static */
3025
3026 Bool do_pruning = False;
3027 switch (HG_(clo_vts_pruning)) {
3028 case 0: /* never */
3029 break;
3030 case 1: /* auto */
3031 do_pruning = (++pruning_auto_ctr % 5) == 0;
3032 break;
3033 case 2: /* always */
3034 do_pruning = True;
3035 break;
3036 default:
3037 tl_assert(0);
3038 }
3039
3040 /* The rest of this routine only handles pruning, so we can
3041 quit at this point if it is not to be done. */
3042 if (!do_pruning)
3043 return;
philippec3508652015-03-28 12:01:58 +00003044 /* No need to do pruning if no thread died since the last pruning as
3045 no VtsTE can be pruned. */
3046 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
3047 return;
sewardjffce8152011-06-24 10:09:41 +00003048
3049 /* ---------- BEGIN VTS PRUNING ---------- */
philippec3508652015-03-28 12:01:58 +00003050 /* Sort and check the very dead threads that died since the last pruning.
3051 Sorting is used for the check and so that we can quickly look
sewardjffce8152011-06-24 10:09:41 +00003052 up the dead-thread entries as we work through the VTSs. */
philippec3508652015-03-28 12:01:58 +00003053 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003054
3055 /* We will run through the old table, and create a new table and
philippea1ac2f42015-05-01 17:12:00 +00003056 set, at the same time setting the u.remap entries in the old
sewardjffce8152011-06-24 10:09:41 +00003057 table to point to the new entries. Then, visit every VtsID in
3058 the system, and replace all of them with new ones, using the
philippea1ac2f42015-05-01 17:12:00 +00003059 u.remap entries in the old table. Finally, we can delete the old
sewardjffce8152011-06-24 10:09:41 +00003060 table and set. */
3061
3062 XArray* /* of VtsTE */ new_tab
3063 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
3064 HG_(free), sizeof(VtsTE) );
3065
3066 /* WordFM VTS* void */
3067 WordFM* new_set
3068 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
3069 HG_(free),
3070 (Word(*)(UWord,UWord))VTS__cmp_structural );
3071
3072 /* Visit each old VTS. For each one:
3073
3074 * make a pruned version
3075
3076 * search new_set for the pruned version, yielding either
3077 Nothing (not present) or the new VtsID for it.
3078
3079 * if not present, allocate a new VtsID for it, insert (pruned
3080 VTS, new VtsID) in the tree, and set
3081 remap_table[old VtsID] = new VtsID.
3082
3083 * if present, set remap_table[old VtsID] = new VtsID, where
3084 new VtsID was determined by the tree lookup. Then free up
3085 the clone.
3086 */
3087
3088 UWord nBeforePruning = 0, nAfterPruning = 0;
3089 UWord nSTSsBefore = 0, nSTSsAfter = 0;
3090 VtsID new_VtsID_ctr = 0;
3091
3092 for (i = 0; i < nTab; i++) {
3093
3094 /* For each old VTS .. */
3095 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
3096 VTS* old_vts = old_te->vts;
sewardjffce8152011-06-24 10:09:41 +00003097
3098 /* Skip it if not in use */
3099 if (old_te->rc == 0) {
3100 tl_assert(old_vts == NULL);
3101 continue;
3102 }
philippea1ac2f42015-05-01 17:12:00 +00003103 tl_assert(old_te->u.remap == VtsID_INVALID);
sewardjffce8152011-06-24 10:09:41 +00003104 tl_assert(old_vts != NULL);
3105 tl_assert(old_vts->id == i);
3106 tl_assert(old_vts->ts != NULL);
3107
3108 /* It is in use. Make a pruned version. */
3109 nBeforePruning++;
3110 nSTSsBefore += old_vts->usedTS;
3111 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
philippec3508652015-03-28 12:01:58 +00003112 old_vts, verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00003113 tl_assert(new_vts->sizeTS == new_vts->usedTS);
3114 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
3115 == 0x0ddC0ffeeBadF00dULL);
3116
3117 /* Get rid of the old VTS and the tree entry. It's a bit more
3118 complex to incrementally delete the VTSs now than to nuke
3119 them all after we're done, but the upside is that we don't
3120 wind up temporarily storing potentially two complete copies
3121 of each VTS and hence spiking memory use. */
3122 UWord oldK = 0, oldV = 12345;
3123 Bool present = VG_(delFromFM)( vts_set,
3124 &oldK, &oldV, (UWord)old_vts );
3125 tl_assert(present); /* else it isn't in vts_set ?! */
3126 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
3127 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
3128 /* now free the VTS itself */
3129 VTS__delete(old_vts);
3130 old_te->vts = NULL;
3131 old_vts = NULL;
3132
3133 /* NO MENTIONS of old_vts allowed beyond this point. */
3134
3135 /* Ok, we have the pruned copy in new_vts. See if a
3136 structurally identical version is already present in new_set.
3137 If so, delete the one we just made and move on; if not, add
3138 it. */
3139 VTS* identical_version = NULL;
3140 UWord valW = 12345;
3141 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
3142 (UWord)new_vts)) {
3143 // already have it
3144 tl_assert(valW == 0);
3145 tl_assert(identical_version != NULL);
3146 tl_assert(identical_version != new_vts);
3147 VTS__delete(new_vts);
3148 new_vts = identical_version;
3149 tl_assert(new_vts->id != VtsID_INVALID);
3150 } else {
3151 tl_assert(valW == 12345);
3152 tl_assert(identical_version == NULL);
3153 new_vts->id = new_VtsID_ctr++;
3154 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
3155 tl_assert(!b);
3156 VtsTE new_te;
3157 new_te.vts = new_vts;
3158 new_te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00003159 new_te.u.freelink = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003160 Word j = VG_(addToXA)( new_tab, &new_te );
3161 tl_assert(j <= i);
3162 tl_assert(j == new_VtsID_ctr - 1);
3163 // stats
3164 nAfterPruning++;
3165 nSTSsAfter += new_vts->usedTS;
3166 }
philippea1ac2f42015-05-01 17:12:00 +00003167 old_te->u.remap = new_vts->id;
sewardjffce8152011-06-24 10:09:41 +00003168
3169 } /* for (i = 0; i < nTab; i++) */
3170
philippec3508652015-03-28 12:01:58 +00003171 /* Move very dead thread from verydead_thread_table_not_pruned to
3172 verydead_thread_table. Sort and check verydead_thread_table
3173 to verify a thread was reported very dead only once. */
3174 {
3175 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
3176
3177 for (i = 0; i < nBT; i++) {
3178 ThrID thrid =
3179 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
3180 VG_(addToXA)( verydead_thread_table, &thrid );
3181 }
3182 verydead_thread_table_sort_and_check (verydead_thread_table);
3183 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
3184 }
3185
sewardjffce8152011-06-24 10:09:41 +00003186 /* At this point, we have:
philippea1ac2f42015-05-01 17:12:00 +00003187 * the old VTS table, with its u.remap entries set,
sewardjffce8152011-06-24 10:09:41 +00003188 and with all .vts == NULL.
3189 * the old VTS tree should be empty, since it and the old VTSs
3190 it contained have been incrementally deleted was we worked
3191 through the old table.
philippea1ac2f42015-05-01 17:12:00 +00003192 * the new VTS table, with all .rc == 0, all u.freelink and u.remap
sewardjffce8152011-06-24 10:09:41 +00003193 == VtsID_INVALID.
3194 * the new VTS tree.
3195 */
3196 tl_assert( VG_(sizeFM)(vts_set) == 0 );
3197
3198 /* Now actually apply the mapping. */
3199 /* Visit all the VtsIDs in the entire system. Where do we expect
3200 to find them?
3201 (a) in shadow memory -- the LineZs and LineFs
3202 (b) in our collection of struct _Thrs.
3203 (c) in our collection of struct _SOs.
3204 Nowhere else, AFAICS. Not in the zsm cache, because that just
3205 got invalidated.
3206
philippea1ac2f42015-05-01 17:12:00 +00003207 Using the u.remap fields in vts_tab, map each old VtsID to a new
sewardjffce8152011-06-24 10:09:41 +00003208 VtsID. For each old VtsID, dec its rc; and for each new one,
3209 inc it. This sets up the new refcounts, and it also gives a
3210 cheap sanity check of the old ones: all old refcounts should be
3211 zero after this operation.
3212 */
3213
3214 /* Do the mappings for (a) above: iterate over the Primary shadow
3215 mem map (WordFM Addr SecMap*). */
3216 UWord secmapW = 0;
3217 VG_(initIterFM)( map_shmem );
3218 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3219 UWord j;
3220 SecMap* sm = (SecMap*)secmapW;
3221 tl_assert(sm->magic == SecMap_MAGIC);
3222 /* Deal with the LineZs */
3223 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3224 LineZ* lineZ = &sm->linesZ[i];
3225 if (lineZ->dict[0] == SVal_INVALID)
3226 continue; /* not in use -- data is in F rep instead */
3227 for (j = 0; j < 4; j++)
3228 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3229 }
3230 /* Deal with the LineFs */
3231 for (i = 0; i < sm->linesF_size; i++) {
3232 LineF* lineF = &sm->linesF[i];
3233 if (!lineF->inUse)
3234 continue;
3235 for (j = 0; j < N_LINE_ARANGE; j++)
3236 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3237 }
3238 }
3239 VG_(doneIterFM)( map_shmem );
3240
3241 /* Do the mappings for (b) above: visit our collection of struct
3242 _Thrs. */
3243 Thread* hgthread = get_admin_threads();
3244 tl_assert(hgthread);
3245 while (hgthread) {
3246 Thr* hbthr = hgthread->hbthr;
3247 tl_assert(hbthr);
3248 /* Threads that are listed in the prunable set have their viR
3249 and viW set to VtsID_INVALID, so we can't mess with them. */
3250 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3251 tl_assert(hbthr->viR == VtsID_INVALID);
3252 tl_assert(hbthr->viW == VtsID_INVALID);
3253 hgthread = hgthread->admin;
3254 continue;
3255 }
3256 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3257 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3258 hgthread = hgthread->admin;
3259 }
3260
3261 /* Do the mappings for (c) above: visit the struct _SOs. */
3262 SO* so = admin_SO;
3263 while (so) {
3264 if (so->viR != VtsID_INVALID)
3265 remap_VtsID( vts_tab, new_tab, &so->viR );
3266 if (so->viW != VtsID_INVALID)
3267 remap_VtsID( vts_tab, new_tab, &so->viW );
3268 so = so->admin_next;
3269 }
3270
3271 /* So, we're nearly done (with this incredibly complex operation).
3272 Check the refcounts for the old VtsIDs all fell to zero, as
3273 expected. Any failure is serious. */
3274 for (i = 0; i < nTab; i++) {
3275 VtsTE* te = VG_(indexXA)( vts_tab, i );
3276 tl_assert(te->vts == NULL);
3277 /* This is the assert proper. Note we're also asserting
philippea1ac2f42015-05-01 17:12:00 +00003278 zeroness for old entries which are unmapped. That's OK. */
sewardjffce8152011-06-24 10:09:41 +00003279 tl_assert(te->rc == 0);
3280 }
3281
3282 /* Install the new table and set. */
3283 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3284 vts_set = new_set;
3285 VG_(deleteXA)( vts_tab );
3286 vts_tab = new_tab;
3287
3288 /* The freelist of vts_tab entries is empty now, because we've
3289 compacted all of the live entries at the low end of the
3290 table. */
3291 vts_tab_freelist = VtsID_INVALID;
3292
3293 /* Sanity check vts_set and vts_tab. */
3294
3295 /* Because all the live entries got slid down to the bottom of vts_tab: */
3296 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3297
3298 /* Assert that the vts_tab and vts_set entries point at each other
3299 in the required way */
3300 UWord wordK = 0, wordV = 0;
3301 VG_(initIterFM)( vts_set );
3302 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3303 tl_assert(wordK != 0);
3304 tl_assert(wordV == 0);
3305 VTS* vts = (VTS*)wordK;
3306 tl_assert(vts->id != VtsID_INVALID);
3307 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3308 tl_assert(te->vts == vts);
3309 }
3310 VG_(doneIterFM)( vts_set );
3311
3312 /* Also iterate over the table, and check each entry is
3313 plausible. */
3314 nTab = VG_(sizeXA)( vts_tab );
3315 for (i = 0; i < nTab; i++) {
3316 VtsTE* te = VG_(indexXA)( vts_tab, i );
3317 tl_assert(te->vts);
3318 tl_assert(te->vts->id == i);
3319 tl_assert(te->rc > 0); /* 'cos we just GC'd */
philippea1ac2f42015-05-01 17:12:00 +00003320 tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3321 /* value of te->u.remap not relevant */
sewardjffce8152011-06-24 10:09:41 +00003322 }
3323
3324 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
3325 if (VG_(clo_stats)) {
3326 static UInt ctr = 1;
3327 tl_assert(nTab > 0);
3328 VG_(message)(
3329 Vg_DebugMsg,
3330 "libhb: VTS PR: #%u before %lu (avg sz %lu) "
3331 "after %lu (avg sz %lu)\n",
3332 ctr++,
3333 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3334 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3335 );
3336 }
sewardjffce8152011-06-24 10:09:41 +00003337 /* ---------- END VTS PRUNING ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00003338}
3339
3340
3341/////////////////////////////////////////////////////////
3342// //
3343// Vts IDs //
3344// //
3345/////////////////////////////////////////////////////////
3346
3347//////////////////////////
sewardj7aa38a92011-02-27 23:04:12 +00003348/* A temporary, max-sized VTS which is used as a temporary (the first
3349 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3350static VTS* temp_max_sized_VTS = NULL;
3351
3352//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00003353static ULong stats__cmpLEQ_queries = 0;
3354static ULong stats__cmpLEQ_misses = 0;
3355static ULong stats__join2_queries = 0;
3356static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003357
3358static inline UInt ROL32 ( UInt w, Int n ) {
3359 w = (w << n) | (w >> (32-n));
3360 return w;
3361}
3362static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3363 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3364 return hash % nTab;
3365}
3366
sewardj23f12002009-07-24 08:45:08 +00003367#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00003368static
sewardj23f12002009-07-24 08:45:08 +00003369 struct { VtsID vi1; VtsID vi2; Bool leq; }
3370 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00003371
3372#define N_JOIN2_CACHE 1023
3373static
3374 struct { VtsID vi1; VtsID vi2; VtsID res; }
3375 join2_cache[N_JOIN2_CACHE];
3376
3377static void VtsID__invalidate_caches ( void ) {
3378 Int i;
sewardj23f12002009-07-24 08:45:08 +00003379 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3380 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3381 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3382 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00003383 }
3384 for (i = 0; i < N_JOIN2_CACHE; i++) {
3385 join2_cache[i].vi1 = VtsID_INVALID;
3386 join2_cache[i].vi2 = VtsID_INVALID;
3387 join2_cache[i].res = VtsID_INVALID;
3388 }
3389}
3390//////////////////////////
3391
sewardjd52392d2008-11-08 20:36:26 +00003392//static Bool VtsID__is_valid ( VtsID vi ) {
3393// VtsTE* ve;
3394// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3395// return False;
3396// ve = VG_(indexXA)( vts_tab, vi );
3397// if (!ve->vts)
3398// return False;
3399// tl_assert(ve->vts->id == vi);
3400// return True;
3401//}
sewardjf98e1c02008-10-25 16:22:41 +00003402
3403static VTS* VtsID__to_VTS ( VtsID vi ) {
3404 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3405 tl_assert(te->vts);
3406 return te->vts;
3407}
3408
3409static void VtsID__pp ( VtsID vi ) {
sewardjf98e1c02008-10-25 16:22:41 +00003410 VTS* vts = VtsID__to_VTS(vi);
florianb28fe892014-10-28 20:52:07 +00003411 VTS__show( vts );
sewardjf98e1c02008-10-25 16:22:41 +00003412}
3413
3414/* compute partial ordering relation of vi1 and vi2. */
3415__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00003416static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00003417 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00003418 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00003419 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00003420 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00003421 tl_assert(vi1 != vi2);
3422 ////++
sewardj23f12002009-07-24 08:45:08 +00003423 stats__cmpLEQ_queries++;
3424 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3425 if (cmpLEQ_cache[hash].vi1 == vi1
3426 && cmpLEQ_cache[hash].vi2 == vi2)
3427 return cmpLEQ_cache[hash].leq;
3428 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00003429 ////--
3430 v1 = VtsID__to_VTS(vi1);
3431 v2 = VtsID__to_VTS(vi2);
sewardje4cce742011-02-24 15:25:24 +00003432 leq = VTS__cmpLEQ( v1, v2 ) == 0;
sewardjf98e1c02008-10-25 16:22:41 +00003433 ////++
sewardj23f12002009-07-24 08:45:08 +00003434 cmpLEQ_cache[hash].vi1 = vi1;
3435 cmpLEQ_cache[hash].vi2 = vi2;
3436 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00003437 ////--
sewardj23f12002009-07-24 08:45:08 +00003438 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00003439}
sewardj23f12002009-07-24 08:45:08 +00003440static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3441 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003442}
3443
3444/* compute binary join */
3445__attribute__((noinline))
3446static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3447 UInt hash;
3448 VtsID res;
sewardj7aa38a92011-02-27 23:04:12 +00003449 VTS *vts1, *vts2;
sewardjf98e1c02008-10-25 16:22:41 +00003450 //if (vi1 == vi2) return vi1;
3451 tl_assert(vi1 != vi2);
3452 ////++
3453 stats__join2_queries++;
3454 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3455 if (join2_cache[hash].vi1 == vi1
3456 && join2_cache[hash].vi2 == vi2)
3457 return join2_cache[hash].res;
3458 stats__join2_misses++;
3459 ////--
3460 vts1 = VtsID__to_VTS(vi1);
3461 vts2 = VtsID__to_VTS(vi2);
sewardj7aa38a92011-02-27 23:04:12 +00003462 temp_max_sized_VTS->usedTS = 0;
3463 VTS__join(temp_max_sized_VTS, vts1,vts2);
3464 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003465 ////++
3466 join2_cache[hash].vi1 = vi1;
3467 join2_cache[hash].vi2 = vi2;
3468 join2_cache[hash].res = res;
3469 ////--
3470 return res;
3471}
3472static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003473 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003474}
3475
3476/* create a singleton VTS, namely [thr:1] */
3477static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
sewardj7aa38a92011-02-27 23:04:12 +00003478 temp_max_sized_VTS->usedTS = 0;
3479 VTS__singleton(temp_max_sized_VTS, thr,tym);
3480 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003481}
3482
3483/* tick operation, creates value 1 if specified index is absent */
3484static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3485 VTS* vts = VtsID__to_VTS(vi);
sewardj7aa38a92011-02-27 23:04:12 +00003486 temp_max_sized_VTS->usedTS = 0;
3487 VTS__tick(temp_max_sized_VTS, idx,vts);
3488 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003489}
3490
3491/* index into a VTS (only for assertions) */
3492static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3493 VTS* vts = VtsID__to_VTS(vi);
3494 return VTS__indexAt_SLOW( vts, idx );
3495}
3496
sewardj23f12002009-07-24 08:45:08 +00003497/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3498 any, really) element in vi1 which is pointwise greater-than the
3499 corresponding element in vi2. If no such element exists, return
3500 NULL. This needs to be fairly quick since it is called every time
3501 a race is detected. */
3502static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3503{
3504 VTS *vts1, *vts2;
sewardje4cce742011-02-24 15:25:24 +00003505 Thr* diffthr;
3506 ThrID diffthrid;
sewardj23f12002009-07-24 08:45:08 +00003507 tl_assert(vi1 != vi2);
3508 vts1 = VtsID__to_VTS(vi1);
3509 vts2 = VtsID__to_VTS(vi2);
3510 tl_assert(vts1 != vts2);
sewardje4cce742011-02-24 15:25:24 +00003511 diffthrid = VTS__cmpLEQ(vts1, vts2);
3512 diffthr = Thr__from_ThrID(diffthrid);
sewardj23f12002009-07-24 08:45:08 +00003513 tl_assert(diffthr); /* else they are LEQ ! */
3514 return diffthr;
3515}
3516
3517
3518/////////////////////////////////////////////////////////
3519// //
3520// Filters //
3521// //
3522/////////////////////////////////////////////////////////
3523
sewardj23f12002009-07-24 08:45:08 +00003524/* Forget everything we know -- clear the filter and let everything
3525 through. This needs to be as fast as possible, since it is called
3526 every time the running thread changes, and every time a thread's
3527 vector clocks change, which can be quite frequent. The obvious
3528 fast way to do this is simply to stuff in tags which we know are
3529 not going to match anything, since they're not aligned to the start
3530 of a line. */
florian6bd9dc12012-11-23 16:17:43 +00003531static void Filter__clear ( Filter* fi, const HChar* who )
sewardj23f12002009-07-24 08:45:08 +00003532{
3533 UWord i;
3534 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3535 for (i = 0; i < FI_NUM_LINES; i += 8) {
3536 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3537 fi->tags[i+1] = 1;
3538 fi->tags[i+2] = 1;
3539 fi->tags[i+3] = 1;
3540 fi->tags[i+4] = 1;
3541 fi->tags[i+5] = 1;
3542 fi->tags[i+6] = 1;
3543 fi->tags[i+7] = 1;
3544 }
3545 tl_assert(i == FI_NUM_LINES);
3546}
3547
3548/* Clearing an arbitrary range in the filter. Unfortunately
3549 we have to do this due to core-supplied new/die-mem events. */
3550
3551static void Filter__clear_1byte ( Filter* fi, Addr a )
3552{
3553 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3554 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3555 FiLine* line = &fi->lines[lineno];
3556 UWord loff = (a - atag) / 8;
3557 UShort mask = 0x3 << (2 * (a & 7));
3558 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3559 if (LIKELY( fi->tags[lineno] == atag )) {
3560 /* hit. clear the bits. */
3561 UShort u16 = line->u16s[loff];
3562 line->u16s[loff] = u16 & ~mask; /* clear them */
3563 } else {
3564 /* miss. The filter doesn't hold this address, so ignore. */
3565 }
3566}
3567
3568static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3569{
3570 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3571 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3572 FiLine* line = &fi->lines[lineno];
3573 UWord loff = (a - atag) / 8;
3574 if (LIKELY( fi->tags[lineno] == atag )) {
3575 line->u16s[loff] = 0;
3576 } else {
3577 /* miss. The filter doesn't hold this address, so ignore. */
3578 }
3579}
3580
3581static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3582{
3583 //VG_(printf)("%lu ", len);
3584 /* slowly do part preceding 8-alignment */
3585 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3586 Filter__clear_1byte( fi, a );
3587 a++;
3588 len--;
3589 }
3590 /* vector loop */
3591 while (len >= 8) {
3592 Filter__clear_8bytes_aligned( fi, a );
3593 a += 8;
3594 len -= 8;
3595 }
3596 /* slowly do tail */
3597 while (UNLIKELY(len > 0)) {
3598 Filter__clear_1byte( fi, a );
3599 a++;
3600 len--;
3601 }
3602}
3603
3604
3605/* ------ Read handlers for the filter. ------ */
3606
3607static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3608{
3609 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3610 return False;
3611 {
3612 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3613 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3614 FiLine* line = &fi->lines[lineno];
3615 UWord loff = (a - atag) / 8;
3616 UShort mask = 0xAAAA;
3617 if (LIKELY( fi->tags[lineno] == atag )) {
3618 /* hit. check line and update. */
3619 UShort u16 = line->u16s[loff];
3620 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3621 line->u16s[loff] = u16 | mask; /* set them */
3622 return ok;
3623 } else {
3624 /* miss. nuke existing line and re-use it. */
3625 UWord i;
3626 fi->tags[lineno] = atag;
3627 for (i = 0; i < FI_LINE_SZB / 8; i++)
3628 line->u16s[i] = 0;
3629 line->u16s[loff] = mask;
3630 return False;
3631 }
3632 }
3633}
3634
3635static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3636{
3637 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3638 return False;
3639 {
3640 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3641 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3642 FiLine* line = &fi->lines[lineno];
3643 UWord loff = (a - atag) / 8;
3644 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3645 if (LIKELY( fi->tags[lineno] == atag )) {
3646 /* hit. check line and update. */
3647 UShort u16 = line->u16s[loff];
3648 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3649 line->u16s[loff] = u16 | mask; /* set them */
3650 return ok;
3651 } else {
3652 /* miss. nuke existing line and re-use it. */
3653 UWord i;
3654 fi->tags[lineno] = atag;
3655 for (i = 0; i < FI_LINE_SZB / 8; i++)
3656 line->u16s[i] = 0;
3657 line->u16s[loff] = mask;
3658 return False;
3659 }
3660 }
3661}
3662
3663static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3664{
3665 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3666 return False;
3667 {
3668 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3669 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3670 FiLine* line = &fi->lines[lineno];
3671 UWord loff = (a - atag) / 8;
3672 UShort mask = 0xA << (2 * (a & 6));
3673 /* mask is A000, 0A00, 00A0 or 000A */
3674 if (LIKELY( fi->tags[lineno] == atag )) {
3675 /* hit. check line and update. */
3676 UShort u16 = line->u16s[loff];
3677 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3678 line->u16s[loff] = u16 | mask; /* set them */
3679 return ok;
3680 } else {
3681 /* miss. nuke existing line and re-use it. */
3682 UWord i;
3683 fi->tags[lineno] = atag;
3684 for (i = 0; i < FI_LINE_SZB / 8; i++)
3685 line->u16s[i] = 0;
3686 line->u16s[loff] = mask;
3687 return False;
3688 }
3689 }
3690}
3691
3692static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3693{
3694 {
3695 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3696 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3697 FiLine* line = &fi->lines[lineno];
3698 UWord loff = (a - atag) / 8;
3699 UShort mask = 0x2 << (2 * (a & 7));
3700 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3701 if (LIKELY( fi->tags[lineno] == atag )) {
3702 /* hit. check line and update. */
3703 UShort u16 = line->u16s[loff];
3704 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3705 line->u16s[loff] = u16 | mask; /* set them */
3706 return ok;
3707 } else {
3708 /* miss. nuke existing line and re-use it. */
3709 UWord i;
3710 fi->tags[lineno] = atag;
3711 for (i = 0; i < FI_LINE_SZB / 8; i++)
3712 line->u16s[i] = 0;
3713 line->u16s[loff] = mask;
3714 return False;
3715 }
3716 }
3717}
3718
3719
3720/* ------ Write handlers for the filter. ------ */
3721
3722static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3723{
3724 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3725 return False;
3726 {
3727 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3728 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3729 FiLine* line = &fi->lines[lineno];
3730 UWord loff = (a - atag) / 8;
3731 UShort mask = 0xFFFF;
3732 if (LIKELY( fi->tags[lineno] == atag )) {
3733 /* hit. check line and update. */
3734 UShort u16 = line->u16s[loff];
3735 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3736 line->u16s[loff] = u16 | mask; /* set them */
3737 return ok;
3738 } else {
3739 /* miss. nuke existing line and re-use it. */
3740 UWord i;
3741 fi->tags[lineno] = atag;
3742 for (i = 0; i < FI_LINE_SZB / 8; i++)
3743 line->u16s[i] = 0;
3744 line->u16s[loff] = mask;
3745 return False;
3746 }
3747 }
3748}
3749
3750static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3751{
3752 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3753 return False;
3754 {
3755 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3756 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3757 FiLine* line = &fi->lines[lineno];
3758 UWord loff = (a - atag) / 8;
3759 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3760 if (LIKELY( fi->tags[lineno] == atag )) {
3761 /* hit. check line and update. */
3762 UShort u16 = line->u16s[loff];
3763 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3764 line->u16s[loff] = u16 | mask; /* set them */
3765 return ok;
3766 } else {
3767 /* miss. nuke existing line and re-use it. */
3768 UWord i;
3769 fi->tags[lineno] = atag;
3770 for (i = 0; i < FI_LINE_SZB / 8; i++)
3771 line->u16s[i] = 0;
3772 line->u16s[loff] = mask;
3773 return False;
3774 }
3775 }
3776}
3777
3778static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3779{
3780 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3781 return False;
3782 {
3783 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3784 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3785 FiLine* line = &fi->lines[lineno];
3786 UWord loff = (a - atag) / 8;
3787 UShort mask = 0xF << (2 * (a & 6));
3788 /* mask is F000, 0F00, 00F0 or 000F */
3789 if (LIKELY( fi->tags[lineno] == atag )) {
3790 /* hit. check line and update. */
3791 UShort u16 = line->u16s[loff];
3792 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3793 line->u16s[loff] = u16 | mask; /* set them */
3794 return ok;
3795 } else {
3796 /* miss. nuke existing line and re-use it. */
3797 UWord i;
3798 fi->tags[lineno] = atag;
3799 for (i = 0; i < FI_LINE_SZB / 8; i++)
3800 line->u16s[i] = 0;
3801 line->u16s[loff] = mask;
3802 return False;
3803 }
3804 }
3805}
3806
3807static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3808{
3809 {
3810 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3811 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3812 FiLine* line = &fi->lines[lineno];
3813 UWord loff = (a - atag) / 8;
3814 UShort mask = 0x3 << (2 * (a & 7));
3815 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3816 if (LIKELY( fi->tags[lineno] == atag )) {
3817 /* hit. check line and update. */
3818 UShort u16 = line->u16s[loff];
3819 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3820 line->u16s[loff] = u16 | mask; /* set them */
3821 return ok;
3822 } else {
3823 /* miss. nuke existing line and re-use it. */
3824 UWord i;
3825 fi->tags[lineno] = atag;
3826 for (i = 0; i < FI_LINE_SZB / 8; i++)
3827 line->u16s[i] = 0;
3828 line->u16s[loff] = mask;
3829 return False;
3830 }
3831 }
3832}
3833
sewardjf98e1c02008-10-25 16:22:41 +00003834
3835/////////////////////////////////////////////////////////
3836// //
3837// Threads //
3838// //
3839/////////////////////////////////////////////////////////
3840
sewardje4cce742011-02-24 15:25:24 +00003841/* Maps ThrID values to their Thr*s (which contain ThrID values that
3842 should point back to the relevant slot in the array. Lowest
3843 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
3844static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
3845
3846/* And a counter to dole out ThrID values. For rationale/background,
3847 see comments on definition of ScalarTS (far) above. */
sewardj7aa38a92011-02-27 23:04:12 +00003848static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
sewardje4cce742011-02-24 15:25:24 +00003849
3850static ThrID Thr__to_ThrID ( Thr* thr ) {
3851 return thr->thrid;
3852}
3853static Thr* Thr__from_ThrID ( UInt thrid ) {
3854 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
3855 tl_assert(thr->thrid == thrid);
3856 return thr;
3857}
3858
3859static Thr* Thr__new ( void )
3860{
sewardjf98e1c02008-10-25 16:22:41 +00003861 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
3862 thr->viR = VtsID_INVALID;
3863 thr->viW = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003864 thr->llexit_done = False;
3865 thr->joinedwith_done = False;
sewardj23f12002009-07-24 08:45:08 +00003866 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
philippeca903bb2014-04-26 22:50:08 +00003867 if (HG_(clo_history_level) == 1)
3868 thr->local_Kws_n_stacks
3869 = VG_(newXA)( HG_(zalloc),
3870 "libhb.Thr__new.3 (local_Kws_and_stacks)",
3871 HG_(free), sizeof(ULong_n_EC) );
sewardje4cce742011-02-24 15:25:24 +00003872
3873 /* Add this Thr* <-> ThrID binding to the mapping, and
3874 cross-check */
3875 if (!thrid_to_thr_map) {
3876 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
3877 HG_(free), sizeof(Thr*) );
sewardje4cce742011-02-24 15:25:24 +00003878 }
3879
sewardj7aa38a92011-02-27 23:04:12 +00003880 if (thrid_counter >= ThrID_MAX_VALID) {
sewardje4cce742011-02-24 15:25:24 +00003881 /* We're hosed. We have to stop. */
3882 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
3883 }
3884
3885 thr->thrid = thrid_counter++;
3886 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
3887 tl_assert(ix + 1024 == thr->thrid);
3888
sewardjf98e1c02008-10-25 16:22:41 +00003889 return thr;
3890}
3891
sewardj8ab2c132009-08-02 09:34:35 +00003892static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00003893{
3894 Word nPresent;
3895 ULong_n_EC pair;
3896 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00003897
3898 // We only collect this info at history level 1 (approx)
3899 if (HG_(clo_history_level) != 1)
3900 return;
3901
sewardj8ab2c132009-08-02 09:34:35 +00003902 /* This is the scalar Kw for thr. */
3903 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00003904 pair.ec = main_get_EC( thr );
3905 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00003906 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00003907
3908 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00003909 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00003910
3911 /* Throw away old stacks, if necessary. We can't accumulate stuff
3912 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00003913 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
3914 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
3915 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
3916 if (0)
3917 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00003918 thr, pair.ull, pair.ec );
3919 }
3920
3921 if (nPresent > 0) {
3922 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00003923 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
3924 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00003925 }
3926
3927 if (nPresent == 0)
3928 pair.ec = NULL;
3929
sewardj8ab2c132009-08-02 09:34:35 +00003930 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00003931
3932 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00003933 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00003934 thr, pair.ull, pair.ec );
3935 if (0)
3936 VG_(pp_ExeContext)(pair.ec);
3937}
3938
florian6bd9dc12012-11-23 16:17:43 +00003939static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
3940 const ULong_n_EC* pair2 )
sewardj23f12002009-07-24 08:45:08 +00003941{
3942 if (pair1->ull < pair2->ull) return -1;
3943 if (pair1->ull > pair2->ull) return 1;
3944 return 0;
3945}
3946
sewardjf98e1c02008-10-25 16:22:41 +00003947
3948/////////////////////////////////////////////////////////
3949// //
3950// Shadow Values //
3951// //
3952/////////////////////////////////////////////////////////
3953
3954// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
3955// hb_zsm.h. We have to do everything else here.
3956
3957/* SVal is 64 bit unsigned int.
3958
3959 <---------30---------> <---------30--------->
3960 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00003961 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00003962 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
3963
sewardjf98e1c02008-10-25 16:22:41 +00003964*/
3965#define SVAL_TAGMASK (3ULL << 62)
3966
3967static inline Bool SVal__isC ( SVal s ) {
3968 return (0ULL << 62) == (s & SVAL_TAGMASK);
3969}
3970static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
3971 //tl_assert(VtsID__is_valid(rmini));
3972 //tl_assert(VtsID__is_valid(wmini));
3973 return (((ULong)rmini) << 32) | ((ULong)wmini);
3974}
3975static inline VtsID SVal__unC_Rmin ( SVal s ) {
3976 tl_assert(SVal__isC(s));
3977 return (VtsID)(s >> 32);
3978}
3979static inline VtsID SVal__unC_Wmin ( SVal s ) {
3980 tl_assert(SVal__isC(s));
3981 return (VtsID)(s & 0xFFFFFFFFULL);
3982}
3983
sewardj23f12002009-07-24 08:45:08 +00003984static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00003985 return (2ULL << 62) == (s & SVAL_TAGMASK);
3986}
sewardj5aa09bf2014-06-20 14:25:53 +00003987__attribute__((unused))
sewardj23f12002009-07-24 08:45:08 +00003988static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00003989 return 2ULL << 62;
3990}
3991
3992/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00003993static inline void SVal__rcinc ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00003994 if (SVal__isC(s)) {
3995 VtsID__rcinc( SVal__unC_Rmin(s) );
3996 VtsID__rcinc( SVal__unC_Wmin(s) );
3997 }
3998}
3999
4000/* Direct callback from lib_zsm. */
philippe1475a7f2015-05-11 19:45:08 +00004001static inline void SVal__rcdec ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00004002 if (SVal__isC(s)) {
4003 VtsID__rcdec( SVal__unC_Rmin(s) );
4004 VtsID__rcdec( SVal__unC_Wmin(s) );
4005 }
4006}
4007
4008
4009/////////////////////////////////////////////////////////
4010// //
4011// Change-event map2 //
4012// //
4013/////////////////////////////////////////////////////////
4014
sewardjf98e1c02008-10-25 16:22:41 +00004015/* This is in two parts:
4016
sewardj23f12002009-07-24 08:45:08 +00004017 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00004018 traces. When the reference count of a stack trace becomes zero,
4019 it is removed from the set and freed up. The intent is to have
4020 a set of stack traces which can be referred to from (2), but to
4021 only represent each one once. The set is indexed/searched by
4022 ordering on the stack trace vectors.
4023
sewardj849b0ed2008-12-21 10:43:10 +00004024 2. A SparseWA of OldRefs. These store information about each old
4025 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00004026 location for which the information is recorded. For LRU
philippecabdbb52015-04-20 21:33:16 +00004027 purposes, each OldRef in the SparseWA is also on a doubly
4028 linked list maintaining the order in which the OldRef were most
4029 recently accessed.
sewardjf98e1c02008-10-25 16:22:41 +00004030
4031 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00004032 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
4033 size) triples to RCECs. This allows us to collect the last
4034 access-traceback by up to N_OLDREF_ACCS different triples for
4035 this location. The accs[] array is a MTF-array. If a binding
4036 falls off the end, that's too bad -- we will lose info about
4037 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00004038
philippecabdbb52015-04-20 21:33:16 +00004039 We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
4040 Then we do exact LRU discarding. For each discarded OldRef we must
sewardjf98e1c02008-10-25 16:22:41 +00004041 of course decrement the reference count on the all RCECs it
4042 refers to, in order that entries from (1) eventually get
4043 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00004044
4045 A major improvement in reliability of this mechanism would be to
4046 have a dynamically sized OldRef.accs[] array, so no entries ever
4047 fall off the end. In investigations (Dec 08) it appears that a
4048 major cause for the non-availability of conflicting-access traces
4049 in race reports is caused by the fixed size of this array. I
4050 suspect for most OldRefs, only a few entries are used, but for a
4051 minority of cases there is an overflow, leading to info lossage.
4052 Investigations also suggest this is very workload and scheduling
4053 sensitive. Therefore a dynamic sizing would be better.
4054
philippe6643e962012-01-17 21:16:30 +00004055 However, dynamic sizing would defeat the use of a PoolAllocator
sewardj849b0ed2008-12-21 10:43:10 +00004056 for OldRef structures. And that's important for performance. So
4057 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00004058*/
4059
4060
4061static UWord stats__ctxt_rcdec1 = 0;
4062static UWord stats__ctxt_rcdec2 = 0;
4063static UWord stats__ctxt_rcdec3 = 0;
4064static UWord stats__ctxt_rcdec_calls = 0;
4065static UWord stats__ctxt_rcdec_discards = 0;
4066static UWord stats__ctxt_rcdec1_eq = 0;
4067
4068static UWord stats__ctxt_tab_curr = 0;
4069static UWord stats__ctxt_tab_max = 0;
4070
4071static UWord stats__ctxt_tab_qs = 0;
4072static UWord stats__ctxt_tab_cmps = 0;
4073
4074
4075///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00004076//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00004077///
4078
4079#define N_FRAMES 8
4080
4081// (UInt) `echo "Reference Counted Execution Context" | md5sum`
4082#define RCEC_MAGIC 0xab88abb2UL
4083
4084//#define N_RCEC_TAB 98317 /* prime */
4085#define N_RCEC_TAB 196613 /* prime */
4086
4087typedef
4088 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00004089 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00004090 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00004091 UWord rc;
4092 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00004093 UWord frames_hash; /* hash of all the frames */
4094 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00004095 }
4096 RCEC;
4097
philippecabdbb52015-04-20 21:33:16 +00004098//////////// BEGIN RCEC pool allocator
4099static PoolAlloc* rcec_pool_allocator;
4100static RCEC* alloc_RCEC ( void ) {
4101 return VG_(allocEltPA) ( rcec_pool_allocator );
4102}
4103
4104static void free_RCEC ( RCEC* rcec ) {
4105 tl_assert(rcec->magic == RCEC_MAGIC);
4106 VG_(freeEltPA)( rcec_pool_allocator, rcec );
4107}
4108//////////// END RCEC pool allocator
4109
sewardjf98e1c02008-10-25 16:22:41 +00004110static RCEC** contextTab = NULL; /* hash table of RCEC*s */
4111
philippecabdbb52015-04-20 21:33:16 +00004112/* Count of allocated RCEC having ref count > 0 */
4113static UWord RCEC_referenced = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004114
4115/* Gives an arbitrary total order on RCEC .frames fields */
4116static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4117 Word i;
4118 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4119 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00004120 if (ec1->frames_hash < ec2->frames_hash) return -1;
4121 if (ec1->frames_hash > ec2->frames_hash) return 1;
4122 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004123 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00004124 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00004125 }
4126 return 0;
4127}
4128
4129
4130/* Dec the ref of this RCEC. */
4131static void ctxt__rcdec ( RCEC* ec )
4132{
4133 stats__ctxt_rcdec_calls++;
4134 tl_assert(ec && ec->magic == RCEC_MAGIC);
4135 tl_assert(ec->rc > 0);
4136 ec->rc--;
philippecabdbb52015-04-20 21:33:16 +00004137 if (ec->rc == 0)
4138 RCEC_referenced--;
sewardjf98e1c02008-10-25 16:22:41 +00004139}
4140
4141static void ctxt__rcinc ( RCEC* ec )
4142{
4143 tl_assert(ec && ec->magic == RCEC_MAGIC);
philippecabdbb52015-04-20 21:33:16 +00004144 if (ec->rc == 0)
4145 RCEC_referenced++;
sewardjf98e1c02008-10-25 16:22:41 +00004146 ec->rc++;
4147}
4148
4149
4150/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
4151 move it one step closer the the front of the list, so as to make
4152 subsequent searches for it cheaper. */
4153static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4154{
4155 RCEC *ec0, *ec1, *ec2;
4156 if (ec == *headp)
4157 tl_assert(0); /* already at head of list */
4158 tl_assert(ec != NULL);
4159 ec0 = *headp;
4160 ec1 = NULL;
4161 ec2 = NULL;
4162 while (True) {
4163 if (ec0 == NULL || ec0 == ec) break;
4164 ec2 = ec1;
4165 ec1 = ec0;
4166 ec0 = ec0->next;
4167 }
4168 tl_assert(ec0 == ec);
4169 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4170 RCEC* tmp;
4171 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4172 predecessor. Swap ec0 and ec1, that is, move ec0 one step
4173 closer to the start of the list. */
4174 tl_assert(ec2->next == ec1);
4175 tl_assert(ec1->next == ec0);
4176 tmp = ec0->next;
4177 ec2->next = ec0;
4178 ec0->next = ec1;
4179 ec1->next = tmp;
4180 }
4181 else
4182 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4183 /* it's second in the list. */
4184 tl_assert(*headp == ec1);
4185 tl_assert(ec1->next == ec0);
4186 ec1->next = ec0->next;
4187 ec0->next = ec1;
4188 *headp = ec0;
4189 }
4190}
4191
4192
4193/* Find the given RCEC in the tree, and return a pointer to it. Or,
4194 if not present, add the given one to the tree (by making a copy of
4195 it, so the caller can immediately deallocate the original) and
4196 return a pointer to the copy. The caller can safely have 'example'
4197 on its stack, since we will always return a pointer to a copy of
4198 it, not to the original. Note that the inserted node will have .rc
4199 of zero and so the caller must immediatly increment it. */
4200__attribute__((noinline))
4201static RCEC* ctxt__find_or_add ( RCEC* example )
4202{
4203 UWord hent;
4204 RCEC* copy;
4205 tl_assert(example && example->magic == RCEC_MAGIC);
4206 tl_assert(example->rc == 0);
4207
4208 /* Search the hash table to see if we already have it. */
4209 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00004210 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00004211 copy = contextTab[hent];
4212 while (1) {
4213 if (!copy) break;
4214 tl_assert(copy->magic == RCEC_MAGIC);
4215 stats__ctxt_tab_cmps++;
4216 if (0 == RCEC__cmp_by_frames(copy, example)) break;
4217 copy = copy->next;
4218 }
4219
4220 if (copy) {
4221 tl_assert(copy != example);
4222 /* optimisation: if it's not at the head of its list, move 1
4223 step fwds, to make future searches cheaper */
4224 if (copy != contextTab[hent]) {
4225 move_RCEC_one_step_forward( &contextTab[hent], copy );
4226 }
4227 } else {
sewardjd86e3a22008-12-03 11:39:37 +00004228 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00004229 tl_assert(copy != example);
4230 *copy = *example;
4231 copy->next = contextTab[hent];
4232 contextTab[hent] = copy;
4233 stats__ctxt_tab_curr++;
4234 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4235 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4236 }
4237 return copy;
4238}
4239
4240static inline UWord ROLW ( UWord w, Int n )
4241{
4242 Int bpw = 8 * sizeof(UWord);
4243 w = (w << n) | (w >> (bpw-n));
4244 return w;
4245}
4246
4247__attribute__((noinline))
4248static RCEC* get_RCEC ( Thr* thr )
4249{
4250 UWord hash, i;
4251 RCEC example;
4252 example.magic = RCEC_MAGIC;
4253 example.rc = 0;
4254 example.rcX = 0;
florian195623b2013-01-22 00:25:05 +00004255 example.next = NULL;
njn6c83d5e2009-05-05 23:46:24 +00004256 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00004257 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00004258 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004259 hash ^= example.frames[i];
4260 hash = ROLW(hash, 19);
4261 }
njn6c83d5e2009-05-05 23:46:24 +00004262 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00004263 return ctxt__find_or_add( &example );
4264}
4265
4266///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00004267//// Part (2):
4268/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00004269///
4270
sewardjffce8152011-06-24 10:09:41 +00004271/* Records an access: a thread, a context (size & writeness) and the
4272 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
4273 2, 10 = 4, 11 = 8.
sewardjc5ea9962008-12-07 01:41:46 +00004274*/
sewardjffce8152011-06-24 10:09:41 +00004275typedef
4276 struct {
4277 RCEC* rcec;
4278 WordSetID locksHeldW;
4279 UInt thrid : SCALARTS_N_THRBITS;
4280 UInt szLg2B : 2;
4281 UInt isW : 1;
4282 }
4283 Thr_n_RCEC;
sewardjf98e1c02008-10-25 16:22:41 +00004284
sewardj849b0ed2008-12-21 10:43:10 +00004285#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00004286
4287typedef
philippecabdbb52015-04-20 21:33:16 +00004288 struct OldRef {
4289 struct OldRef *prev; // to refs older than this one
4290 struct OldRef *next; // to refs newer that this one
4291 Addr ga; // Address for which we record up to N_OLDREF_ACCS accesses.
sewardjffce8152011-06-24 10:09:41 +00004292 /* unused slots in this array have .thrid == 0, which is invalid */
sewardjf98e1c02008-10-25 16:22:41 +00004293 Thr_n_RCEC accs[N_OLDREF_ACCS];
4294 }
4295 OldRef;
philippecabdbb52015-04-20 21:33:16 +00004296/* We need ga in OldRef in order to remove OldRef from the sparsewa
4297 by key (i.e. ga) when re-using the lru OldRef. */
sewardjd86e3a22008-12-03 11:39:37 +00004298
philippe6643e962012-01-17 21:16:30 +00004299//////////// BEGIN OldRef pool allocator
4300static PoolAlloc* oldref_pool_allocator;
philippecabdbb52015-04-20 21:33:16 +00004301// Note: We only allocate elements in this pool allocator, we never free them.
4302// We stop allocating elements at VG_(clo_conflict_cache_size).
philippe6643e962012-01-17 21:16:30 +00004303//////////// END OldRef pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00004304
philippecabdbb52015-04-20 21:33:16 +00004305static OldRef mru;
4306static OldRef lru;
4307// A double linked list, chaining all OldREf in a mru/lru order.
4308// mru/lru are sentinel nodes.
4309// Whenever an oldref is re-used, its position is changed as the most recently
4310// used (i.e. pointed to by mru.prev).
4311// When a new oldref is needed, it is allocated from the pool
4312// if we have not yet reached --conflict-cache-size.
4313// Otherwise, if all oldref have already been allocated,
4314// the least recently used (i.e. pointed to by lru.next) is re-used.
4315// When an OldRef is used, it is moved as the most recently used entry
4316// (i.e. pointed to by mru.prev).
4317
4318// Removes r from the double linked list
4319// Note: we do not need to test for special cases such as
4320// NULL next or prev pointers, because we have sentinel nodes
4321// at both sides of the list. So, a node is always forward and
4322// backward linked.
4323static inline void OldRef_unchain(OldRef *r)
4324{
4325 r->next->prev = r->prev;
4326 r->prev->next = r->next;
4327}
4328
4329// Insert new as the newest OldRef
4330// Similarly to OldRef_unchain, no need to test for NULL
4331// pointers, as e.g. mru.prev is always guaranteed to point
4332// to a non NULL node (lru when the list is empty).
4333static inline void OldRef_newest(OldRef *new)
4334{
4335 new->next = &mru;
4336 new->prev = mru.prev;
4337 mru.prev = new;
4338 new->prev->next = new;
4339}
sewardjd86e3a22008-12-03 11:39:37 +00004340
sewardjbc307e52008-12-06 22:10:54 +00004341static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
sewardjbc307e52008-12-06 22:10:54 +00004342static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
philippecabdbb52015-04-20 21:33:16 +00004343/* Note: the nr of ref in the oldrefTree will always be equal to
4344 the nr of elements that were allocated from the OldRef pool allocator
4345 as we never free an OldRef : we just re-use them. */
4346
4347
4348/* allocates a new OldRef or re-use the lru one if all allowed OldRef
4349 have already been allocated. */
4350static OldRef* alloc_or_reuse_OldRef ( void )
4351{
4352 if (oldrefTreeN < HG_(clo_conflict_cache_size)) {
4353 oldrefTreeN++;
4354 return VG_(allocEltPA) ( oldref_pool_allocator );
4355 } else {
4356 Bool b;
4357 UWord valW;
4358 OldRef *oldref = lru.next;
4359
4360 OldRef_unchain(oldref);
4361 b = VG_(delFromSWA)( oldrefTree, &valW, oldref->ga );
4362 tl_assert(b);
4363 tl_assert (oldref == (OldRef*)valW);
4364
4365 for (UInt i = 0; i < N_OLDREF_ACCS; i++) {
4366 ThrID aThrID = oldref->accs[i].thrid;
4367 RCEC* aRef = oldref->accs[i].rcec;
4368 if (aRef) {
4369 tl_assert(aThrID != 0);
4370 stats__ctxt_rcdec3++;
4371 ctxt__rcdec( aRef );
4372 } else {
4373 tl_assert(aThrID == 0);
4374 }
4375 }
4376 return oldref;
4377 }
4378}
4379
sewardjf98e1c02008-10-25 16:22:41 +00004380
sewardj1669cc72008-12-13 01:20:21 +00004381inline static UInt min_UInt ( UInt a, UInt b ) {
4382 return a < b ? a : b;
4383}
4384
sewardja781be62008-12-08 00:12:28 +00004385/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4386 first interval is lower, 1 if the first interval is higher, and 0
4387 if there is any overlap. Redundant paranoia with casting is there
4388 following what looked distinctly like a bug in gcc-4.1.2, in which
4389 some of the comparisons were done signedly instead of
4390 unsignedly. */
4391/* Copied from exp-ptrcheck/sg_main.c */
4392static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4393 Addr a2, SizeT n2 ) {
4394 UWord a1w = (UWord)a1;
4395 UWord n1w = (UWord)n1;
4396 UWord a2w = (UWord)a2;
4397 UWord n2w = (UWord)n2;
4398 tl_assert(n1w > 0 && n2w > 0);
4399 if (a1w + n1w <= a2w) return -1L;
4400 if (a2w + n2w <= a1w) return 1L;
4401 return 0;
4402}
4403
sewardjc5ea9962008-12-07 01:41:46 +00004404static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00004405{
sewardjd86e3a22008-12-03 11:39:37 +00004406 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00004407 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00004408 Word i, j;
philippe40648e22015-04-11 11:42:22 +00004409 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004410 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004411
sewardjffce8152011-06-24 10:09:41 +00004412 tl_assert(thr);
4413 ThrID thrid = thr->thrid;
4414 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4415
4416 WordSetID locksHeldW = thr->hgthread->locksetW;
4417
sewardjc5ea9962008-12-07 01:41:46 +00004418 rcec = get_RCEC( thr );
4419 ctxt__rcinc(rcec);
4420
sewardjffce8152011-06-24 10:09:41 +00004421 UInt szLg2B = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004422 switch (szB) {
4423 /* This doesn't look particularly branch-predictor friendly. */
sewardjffce8152011-06-24 10:09:41 +00004424 case 1: szLg2B = 0; break;
4425 case 2: szLg2B = 1; break;
4426 case 4: szLg2B = 2; break;
4427 case 8: szLg2B = 3; break;
sewardjc5ea9962008-12-07 01:41:46 +00004428 default: tl_assert(0);
4429 }
4430
sewardjffce8152011-06-24 10:09:41 +00004431 /* Look in the map to see if we already have a record for this
4432 address. */
philippe40648e22015-04-11 11:42:22 +00004433 b = VG_(lookupSWA)( oldrefTree, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00004434
sewardjd86e3a22008-12-03 11:39:37 +00004435 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00004436
4437 /* We already have a record for this address. We now need to
sewardjffce8152011-06-24 10:09:41 +00004438 see if we have a stack trace pertaining to this (thrid, R/W,
sewardj849b0ed2008-12-21 10:43:10 +00004439 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00004440 ref = (OldRef*)valW;
philippecabdbb52015-04-20 21:33:16 +00004441
4442 tl_assert (ref->ga == a);
sewardjf98e1c02008-10-25 16:22:41 +00004443
sewardjf98e1c02008-10-25 16:22:41 +00004444 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004445 if (ref->accs[i].thrid != thrid)
sewardj849b0ed2008-12-21 10:43:10 +00004446 continue;
sewardjffce8152011-06-24 10:09:41 +00004447 if (ref->accs[i].szLg2B != szLg2B)
4448 continue;
4449 if (ref->accs[i].isW != (UInt)(isW & 1))
sewardj849b0ed2008-12-21 10:43:10 +00004450 continue;
4451 /* else we have a match, so stop looking. */
4452 break;
sewardjf98e1c02008-10-25 16:22:41 +00004453 }
4454
4455 if (i < N_OLDREF_ACCS) {
sewardjffce8152011-06-24 10:09:41 +00004456 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */
sewardjf98e1c02008-10-25 16:22:41 +00004457 if (i > 0) {
4458 Thr_n_RCEC tmp = ref->accs[i-1];
4459 ref->accs[i-1] = ref->accs[i];
4460 ref->accs[i] = tmp;
4461 i--;
4462 }
sewardjc5ea9962008-12-07 01:41:46 +00004463 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00004464 stats__ctxt_rcdec1++;
sewardjffce8152011-06-24 10:09:41 +00004465 ctxt__rcdec( ref->accs[i].rcec );
4466 tl_assert(ref->accs[i].thrid == thrid);
4467 /* Update the RCEC and the W-held lockset. */
4468 ref->accs[i].rcec = rcec;
4469 ref->accs[i].locksHeldW = locksHeldW;
sewardjf98e1c02008-10-25 16:22:41 +00004470 } else {
sewardjffce8152011-06-24 10:09:41 +00004471 /* No entry for this (thread, R/W, size, nWHeld) quad.
4472 Shuffle all of them down one slot, and put the new entry
4473 at the start of the array. */
4474 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
sewardjf98e1c02008-10-25 16:22:41 +00004475 /* the last slot is in use. We must dec the rc on the
4476 associated rcec. */
4477 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
4478 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00004479 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
4480 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjffce8152011-06-24 10:09:41 +00004481 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
sewardjf98e1c02008-10-25 16:22:41 +00004482 } else {
4483 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
4484 }
4485 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
4486 ref->accs[j] = ref->accs[j-1];
sewardjffce8152011-06-24 10:09:41 +00004487 ref->accs[0].thrid = thrid;
4488 ref->accs[0].szLg2B = szLg2B;
4489 ref->accs[0].isW = (UInt)(isW & 1);
4490 ref->accs[0].locksHeldW = locksHeldW;
4491 ref->accs[0].rcec = rcec;
4492 /* thrid==0 is used to signify an empty slot, so we can't
4493 add zero thrid (such a ThrID is invalid anyway). */
4494 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
sewardjf98e1c02008-10-25 16:22:41 +00004495 }
4496
philippecabdbb52015-04-20 21:33:16 +00004497 OldRef_unchain(ref);
4498 OldRef_newest(ref);
sewardjf98e1c02008-10-25 16:22:41 +00004499
4500 } else {
4501
4502 /* We don't have a record for this address. Create a new one. */
philippecabdbb52015-04-20 21:33:16 +00004503 ref = alloc_or_reuse_OldRef();
4504 ref->ga = a;
sewardjffce8152011-06-24 10:09:41 +00004505 ref->accs[0].thrid = thrid;
4506 ref->accs[0].szLg2B = szLg2B;
4507 ref->accs[0].isW = (UInt)(isW & 1);
4508 ref->accs[0].locksHeldW = locksHeldW;
4509 ref->accs[0].rcec = rcec;
4510
4511 /* thrid==0 is used to signify an empty slot, so we can't
4512 add zero thrid (such a ThrID is invalid anyway). */
4513 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4514
4515 /* Clear out the rest of the entries */
sewardjf98e1c02008-10-25 16:22:41 +00004516 for (j = 1; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004517 ref->accs[j].rcec = NULL;
4518 ref->accs[j].thrid = 0;
4519 ref->accs[j].szLg2B = 0;
4520 ref->accs[j].isW = 0;
4521 ref->accs[j].locksHeldW = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004522 }
sewardjbc307e52008-12-06 22:10:54 +00004523 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
philippecabdbb52015-04-20 21:33:16 +00004524 OldRef_newest (ref);
sewardjf98e1c02008-10-25 16:22:41 +00004525 }
4526}
4527
4528
sewardjffce8152011-06-24 10:09:41 +00004529/* Extract info from the conflicting-access machinery. */
sewardjc5ea9962008-12-07 01:41:46 +00004530Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjffce8152011-06-24 10:09:41 +00004531 /*OUT*/Thr** resThr,
4532 /*OUT*/SizeT* resSzB,
4533 /*OUT*/Bool* resIsW,
4534 /*OUT*/WordSetID* locksHeldW,
sewardjc5ea9962008-12-07 01:41:46 +00004535 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00004536{
sewardja781be62008-12-08 00:12:28 +00004537 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00004538 OldRef* ref;
philippe40648e22015-04-11 11:42:22 +00004539 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004540 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004541
sewardjffce8152011-06-24 10:09:41 +00004542 ThrID cand_thrid;
4543 RCEC* cand_rcec;
4544 Bool cand_isW;
4545 SizeT cand_szB;
4546 WordSetID cand_locksHeldW;
4547 Addr cand_a;
sewardja781be62008-12-08 00:12:28 +00004548
4549 Addr toCheck[15];
4550 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004551
4552 tl_assert(thr);
4553 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00004554
sewardjffce8152011-06-24 10:09:41 +00004555 ThrID thrid = thr->thrid;
4556
sewardja781be62008-12-08 00:12:28 +00004557 toCheck[nToCheck++] = a;
4558 for (i = -7; i < (Word)szB; i++) {
4559 if (i != 0)
4560 toCheck[nToCheck++] = a + i;
4561 }
4562 tl_assert(nToCheck <= 15);
4563
4564 /* Now see if we can find a suitable matching event for
4565 any of the addresses in toCheck[0 .. nToCheck-1]. */
4566 for (j = 0; j < nToCheck; j++) {
4567
4568 cand_a = toCheck[j];
4569 // VG_(printf)("test %ld %p\n", j, cand_a);
4570
philippe40648e22015-04-11 11:42:22 +00004571 b = VG_(lookupSWA)( oldrefTree, &valW, cand_a );
sewardja781be62008-12-08 00:12:28 +00004572 if (!b)
4573 continue;
4574
sewardjd86e3a22008-12-03 11:39:37 +00004575 ref = (OldRef*)valW;
sewardjffce8152011-06-24 10:09:41 +00004576 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
sewardjf98e1c02008-10-25 16:22:41 +00004577
sewardjffce8152011-06-24 10:09:41 +00004578 cand_thrid = 0; /* invalid; see comments in event_map_bind */
4579 cand_rcec = NULL;
4580 cand_isW = False;
4581 cand_szB = 0;
4582 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
sewardjf98e1c02008-10-25 16:22:41 +00004583
sewardjc5ea9962008-12-07 01:41:46 +00004584 for (i = 0; i < N_OLDREF_ACCS; i++) {
4585 Thr_n_RCEC* cand = &ref->accs[i];
sewardjffce8152011-06-24 10:09:41 +00004586 cand_rcec = cand->rcec;
4587 cand_thrid = cand->thrid;
4588 cand_isW = (Bool)cand->isW;
4589 cand_szB = 1 << cand->szLg2B;
4590 cand_locksHeldW = cand->locksHeldW;
sewardjc5ea9962008-12-07 01:41:46 +00004591
sewardjffce8152011-06-24 10:09:41 +00004592 if (cand_thrid == 0)
sewardjc5ea9962008-12-07 01:41:46 +00004593 /* This slot isn't in use. Ignore it. */
4594 continue;
4595
sewardjffce8152011-06-24 10:09:41 +00004596 if (cand_thrid == thrid)
sewardjc5ea9962008-12-07 01:41:46 +00004597 /* This is an access by the same thread, but we're only
4598 interested in accesses from other threads. Ignore. */
4599 continue;
4600
4601 if ((!cand_isW) && (!isW))
4602 /* We don't want to report a read racing against another
4603 read; that's stupid. So in this case move on. */
4604 continue;
4605
sewardja781be62008-12-08 00:12:28 +00004606 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
4607 /* No overlap with the access we're asking about. Ignore. */
4608 continue;
4609
sewardjc5ea9962008-12-07 01:41:46 +00004610 /* We have a match. Stop searching. */
4611 break;
4612 }
4613
4614 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
4615
sewardja781be62008-12-08 00:12:28 +00004616 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00004617 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00004618 /* return with success */
sewardjffce8152011-06-24 10:09:41 +00004619 tl_assert(cand_thrid);
sewardja781be62008-12-08 00:12:28 +00004620 tl_assert(cand_rcec);
4621 tl_assert(cand_rcec->magic == RCEC_MAGIC);
4622 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00004623 /* Count how many non-zero frames we have. */
4624 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4625 for (n = 0; n < maxNFrames; n++) {
4626 if (0 == cand_rcec->frames[n]) break;
4627 }
sewardjffce8152011-06-24 10:09:41 +00004628 *resEC = VG_(make_ExeContext_from_StackTrace)
4629 (cand_rcec->frames, n);
4630 *resThr = Thr__from_ThrID(cand_thrid);
4631 *resSzB = cand_szB;
4632 *resIsW = cand_isW;
4633 *locksHeldW = cand_locksHeldW;
sewardja781be62008-12-08 00:12:28 +00004634 return True;
4635 }
sewardjc5ea9962008-12-07 01:41:46 +00004636
sewardja781be62008-12-08 00:12:28 +00004637 /* consider next address in toCheck[] */
4638 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00004639
sewardja781be62008-12-08 00:12:28 +00004640 /* really didn't find anything. */
4641 return False;
sewardjf98e1c02008-10-25 16:22:41 +00004642}
4643
4644static void event_map_init ( void )
4645{
4646 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00004647
philippe6643e962012-01-17 21:16:30 +00004648 /* Context (RCEC) pool allocator */
4649 rcec_pool_allocator = VG_(newPA) (
4650 sizeof(RCEC),
4651 1000 /* RCECs per pool */,
4652 HG_(zalloc),
4653 "libhb.event_map_init.1 (RCEC pools)",
4654 HG_(free)
4655 );
sewardjd86e3a22008-12-03 11:39:37 +00004656
4657 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00004658 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00004659 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00004660 N_RCEC_TAB * sizeof(RCEC*) );
sewardjf98e1c02008-10-25 16:22:41 +00004661 for (i = 0; i < N_RCEC_TAB; i++)
4662 contextTab[i] = NULL;
4663
philippe6643e962012-01-17 21:16:30 +00004664 /* Oldref pool allocator */
4665 oldref_pool_allocator = VG_(newPA)(
4666 sizeof(OldRef),
4667 1000 /* OldRefs per pool */,
4668 HG_(zalloc),
4669 "libhb.event_map_init.3 (OldRef pools)",
4670 HG_(free)
4671 );
sewardjd86e3a22008-12-03 11:39:37 +00004672
sewardjd86e3a22008-12-03 11:39:37 +00004673 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00004674 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00004675 oldrefTree = VG_(newSWA)(
4676 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00004677 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00004678 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00004679 );
sewardjf98e1c02008-10-25 16:22:41 +00004680
sewardjf98e1c02008-10-25 16:22:41 +00004681 oldrefTreeN = 0;
philippecabdbb52015-04-20 21:33:16 +00004682 mru.prev = &lru;
4683 mru.next = NULL;
4684 lru.prev = NULL;
4685 lru.next = &mru;
4686 for (i = 0; i < N_OLDREF_ACCS; i++) {
4687 mru.accs[i] = (Thr_n_RCEC) {.rcec = NULL,
4688 .locksHeldW = 0,
4689 .thrid = 0,
4690 .szLg2B = 0,
4691 .isW = 0};
4692 lru.accs[i] = mru.accs[i];
4693 }
sewardjf98e1c02008-10-25 16:22:41 +00004694}
4695
philippecabdbb52015-04-20 21:33:16 +00004696static void event_map__check_reference_counts ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004697{
4698 RCEC* rcec;
4699 OldRef* oldref;
4700 Word i;
4701 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00004702 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00004703
4704 /* Set the 'check' reference counts to zero. Also, optionally
4705 check that the real reference counts are non-zero. We allow
4706 these to fall to zero before a GC, but the GC must get rid of
4707 all those that are zero, hence none should be zero after a
4708 GC. */
4709 for (i = 0; i < N_RCEC_TAB; i++) {
4710 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4711 nEnts++;
4712 tl_assert(rcec);
4713 tl_assert(rcec->magic == RCEC_MAGIC);
sewardjf98e1c02008-10-25 16:22:41 +00004714 rcec->rcX = 0;
4715 }
4716 }
4717
4718 /* check that the stats are sane */
4719 tl_assert(nEnts == stats__ctxt_tab_curr);
4720 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4721
4722 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00004723 VG_(initIterSWA)( oldrefTree );
4724 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004725 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004726 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004727 ThrID aThrID = oldref->accs[i].thrid;
4728 RCEC* aRef = oldref->accs[i].rcec;
4729 if (aThrID != 0) {
sewardjc5ea9962008-12-07 01:41:46 +00004730 tl_assert(aRef);
4731 tl_assert(aRef->magic == RCEC_MAGIC);
4732 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00004733 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004734 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00004735 }
4736 }
4737 }
4738
4739 /* compare check ref counts with actual */
4740 for (i = 0; i < N_RCEC_TAB; i++) {
4741 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4742 tl_assert(rcec->rc == rcec->rcX);
4743 }
4744 }
4745}
4746
sewardj8fd92d32008-11-20 23:17:01 +00004747__attribute__((noinline))
philippecabdbb52015-04-20 21:33:16 +00004748static void do_RCEC_GC ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004749{
philippecabdbb52015-04-20 21:33:16 +00004750 UInt i;
sewardjf98e1c02008-10-25 16:22:41 +00004751
philippecabdbb52015-04-20 21:33:16 +00004752 if (VG_(clo_stats)) {
4753 static UInt ctr = 1;
4754 VG_(message)(Vg_DebugMsg,
4755 "libhb: RCEC GC: #%u %lu slots,"
4756 " %lu cur ents(ref'd %lu),"
4757 " %lu max ents\n",
4758 ctr++,
4759 (UWord)N_RCEC_TAB,
4760 stats__ctxt_tab_curr, RCEC_referenced,
4761 stats__ctxt_tab_max );
sewardjf98e1c02008-10-25 16:22:41 +00004762 }
philippecabdbb52015-04-20 21:33:16 +00004763 tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004764
4765 /* Throw away all RCECs with zero reference counts */
4766 for (i = 0; i < N_RCEC_TAB; i++) {
4767 RCEC** pp = &contextTab[i];
4768 RCEC* p = *pp;
4769 while (p) {
4770 if (p->rc == 0) {
4771 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004772 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004773 p = *pp;
4774 tl_assert(stats__ctxt_tab_curr > 0);
philippe06bc23a2015-04-17 21:19:43 +00004775 stats__ctxt_rcdec_discards++;
sewardjf98e1c02008-10-25 16:22:41 +00004776 stats__ctxt_tab_curr--;
4777 } else {
4778 pp = &p->next;
4779 p = p->next;
4780 }
4781 }
4782 }
4783
philippecabdbb52015-04-20 21:33:16 +00004784 tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004785}
4786
sewardjf98e1c02008-10-25 16:22:41 +00004787/////////////////////////////////////////////////////////
4788// //
4789// Core MSM //
4790// //
4791/////////////////////////////////////////////////////////
4792
sewardj23f12002009-07-24 08:45:08 +00004793/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4794 Nov 08, and again after [...],
4795 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004796
sewardj23f12002009-07-24 08:45:08 +00004797static ULong stats__msmcread = 0;
4798static ULong stats__msmcread_change = 0;
4799static ULong stats__msmcwrite = 0;
4800static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004801
sewardj8ab2c132009-08-02 09:34:35 +00004802/* Some notes on the H1 history mechanism:
4803
4804 Transition rules are:
4805
4806 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4807 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4808
4809 After any access by a thread T to a location L, L's constraint pair
4810 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4811
4812 After a race by thread T conflicting with some previous access by
4813 some other thread U, for a location with constraint (before
4814 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4815 which the previously access lies.
4816
4817 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4818 are compared so as to find out which thread(s) this access
4819 conflicts with. Once that is established, we also require the
4820 pre-update Cw for the location, so we can index into it for those
4821 threads, to get the scalar clock values for the point at which the
4822 former accesses were made. (In fact we only bother to do any of
4823 this for an arbitrarily chosen one of the conflicting threads, as
4824 that's simpler, it avoids flooding the user with vast amounts of
4825 mostly useless information, and because the program is wrong if it
4826 contains any races at all -- so we don't really need to show all
4827 conflicting access pairs initially, so long as we only show none if
4828 none exist).
4829
4830 ---
4831
4832 That requires the auxiliary proof that
4833
4834 (Cr `join` Kw)[T] == Kw[T]
4835
4836 Why should that be true? Because for any thread T, Kw[T] >= the
4837 scalar clock value for T known by any other thread. In other
4838 words, because T's value for its own scalar clock is at least as up
4839 to date as the value for it known by any other thread (that is true
4840 for both the R- and W- scalar clocks). Hence no other thread will
4841 be able to feed in a value for that element (indirectly via a
4842 constraint) which will exceed Kw[T], and hence the join cannot
4843 cause that particular element to advance.
4844*/
4845
sewardjf98e1c02008-10-25 16:22:41 +00004846__attribute__((noinline))
4847static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004848 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00004849 VtsID Cfailed,
4850 VtsID Kfailed,
4851 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00004852{
sewardjc5ea9962008-12-07 01:41:46 +00004853 /* Call here to report a race. We just hand it onwards to
4854 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004855 error is going to be collected, then, at history_level 2, that
4856 queries the conflicting-event map. The alternative would be to
4857 query it right here. But that causes a lot of pointless queries
4858 for errors which will shortly be discarded as duplicates, and
4859 can become a performance overhead; so we defer the query until
4860 we know the error is not a duplicate. */
4861
4862 /* Stacks for the bounds of the (or one of the) conflicting
4863 segment(s). These are only set at history_level 1. */
4864 ExeContext* hist1_seg_start = NULL;
4865 ExeContext* hist1_seg_end = NULL;
4866 Thread* hist1_conf_thr = NULL;
4867
4868 tl_assert(acc_thr);
sewardj60626642011-03-10 15:14:37 +00004869 tl_assert(acc_thr->hgthread);
4870 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
sewardj23f12002009-07-24 08:45:08 +00004871 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4872
4873 if (HG_(clo_history_level) == 1) {
4874 Bool found;
4875 Word firstIx, lastIx;
4876 ULong_n_EC key;
4877
4878 /* At history_level 1, we must round up the relevant stack-pair
4879 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00004880 deferring it is complex; we can't (easily) put Kfailed and
4881 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00004882 getting tied up in difficulties with VtsID reference
4883 counting. So just do it now. */
4884 Thr* confThr;
4885 ULong confTym = 0;
4886 /* Which thread are we in conflict with? There may be more than
4887 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4888 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00004889 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00004890 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00004891 conflict (semantics of return value of
4892 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4893 called us, just checked exactly this -- that there was in
4894 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00004895 tl_assert(confThr);
4896
4897 /* Get the scalar clock value that the conflicting thread
4898 introduced into the constraint. A careful examination of the
4899 base machine rules shows that this must be the same as the
4900 conflicting thread's scalar clock when it created this
4901 constraint. Hence we know the scalar clock of the
4902 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00004903 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00004904
4905 /* Using this scalar clock, index into the conflicting thread's
4906 collection of stack traces made each time its vector clock
4907 (hence its scalar clock) changed. This gives the stack
4908 traces at the start and end of the conflicting segment (well,
4909 as per comment just above, of one of the conflicting
4910 segments, if there are more than one). */
4911 key.ull = confTym;
4912 key.ec = NULL;
4913 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00004914 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004915 firstIx = lastIx = 0;
4916 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00004917 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004918 &key, &firstIx, &lastIx,
florian6bd9dc12012-11-23 16:17:43 +00004919 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
sewardj23f12002009-07-24 08:45:08 +00004920 );
sewardj8ab2c132009-08-02 09:34:35 +00004921 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00004922 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00004923 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00004924 confThr, confTym, found, firstIx, lastIx);
4925 /* We can't indefinitely collect stack traces at VTS
4926 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00004927 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00004928 ones, which in turn means we might fail to find index value
4929 confTym in the array. */
4930 if (found) {
4931 ULong_n_EC *pair_start, *pair_end;
4932 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00004933 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00004934 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004935 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00004936 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00004937 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004938 lastIx+1 );
4939 /* from properties of VG_(lookupXA) and the comparison fn used: */
4940 tl_assert(pair_start->ull < pair_end->ull);
4941 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004942 /* Could do a bit better here. It may be that pair_end
4943 doesn't have a stack, but the following entries in the
4944 array have the same scalar Kw and to have a stack. So
4945 we should search a bit further along the array than
4946 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00004947 } else {
sewardjffce8152011-06-24 10:09:41 +00004948 if (!confThr->llexit_done)
sewardj23f12002009-07-24 08:45:08 +00004949 hist1_seg_end = main_get_EC( confThr );
4950 }
4951 // seg_start could be NULL iff this is the first stack in the thread
4952 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4953 //if (seg_end) VG_(pp_ExeContext)(seg_end);
sewardj60626642011-03-10 15:14:37 +00004954 hist1_conf_thr = confThr->hgthread;
sewardj23f12002009-07-24 08:45:08 +00004955 }
4956 }
4957
sewardj60626642011-03-10 15:14:37 +00004958 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00004959 szB, isWrite,
4960 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00004961}
4962
4963static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00004964 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00004965 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00004966 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4967 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00004968}
4969
4970
4971/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00004972static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004973 /* The following are only needed for
4974 creating error reports. */
4975 Thr* acc_thr,
4976 Addr acc_addr, SizeT szB )
4977{
4978 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004979 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00004980
4981 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004982 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004983 tl_assert(is_sane_SVal_C(svOld));
4984 }
4985
sewardj1c0ce7a2009-07-01 08:10:49 +00004986 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004987 VtsID tviR = acc_thr->viR;
4988 VtsID tviW = acc_thr->viW;
4989 VtsID rmini = SVal__unC_Rmin(svOld);
4990 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004991 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4992 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004993 /* no race */
4994 /* Note: RWLOCK subtlety: use tviW, not tviR */
4995 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4996 goto out;
4997 } else {
sewardjb0e009d2008-11-19 16:35:15 +00004998 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004999 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5000 tl_assert(leqxx);
5001 // same as in non-race case
5002 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5003 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005004 rmini, /* Cfailed */
5005 tviR, /* Kfailed */
5006 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005007 goto out;
5008 }
5009 }
5010 if (SVal__isA(svOld)) {
5011 /* reading no-access memory (sigh); leave unchanged */
5012 /* check for no pollution */
5013 tl_assert(svOld == SVal_NOACCESS);
5014 svNew = SVal_NOACCESS;
5015 goto out;
5016 }
sewardj23f12002009-07-24 08:45:08 +00005017 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005018 tl_assert(0);
5019
5020 out:
sewardj8f5374e2008-12-07 11:40:17 +00005021 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005022 tl_assert(is_sane_SVal_C(svNew));
5023 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005024 if (UNLIKELY(svNew != svOld)) {
5025 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005026 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005027 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005028 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005029 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005030 }
5031 }
5032 return svNew;
5033}
5034
5035
5036/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00005037static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005038 /* The following are only needed for
5039 creating error reports. */
5040 Thr* acc_thr,
5041 Addr acc_addr, SizeT szB )
5042{
5043 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005044 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00005045
5046 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005047 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005048 tl_assert(is_sane_SVal_C(svOld));
5049 }
5050
sewardj1c0ce7a2009-07-01 08:10:49 +00005051 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005052 VtsID tviW = acc_thr->viW;
5053 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005054 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5055 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005056 /* no race */
5057 svNew = SVal__mkC( tviW, tviW );
5058 goto out;
5059 } else {
5060 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00005061 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005062 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5063 tl_assert(leqxx);
5064 // same as in non-race case
5065 // proof: in the non-race case, we have
5066 // rmini <= wmini (invar on constraints)
5067 // tviW <= tviR (invar on thread clocks)
5068 // wmini <= tviW (from run-time check)
5069 // hence from transitivity of <= we have
5070 // rmini <= wmini <= tviW
5071 // and so join(rmini,tviW) == tviW
5072 // and join(wmini,tviW) == tviW
5073 // qed.
5074 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5075 VtsID__join2(wmini, tviW) );
5076 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005077 wmini, /* Cfailed */
5078 tviW, /* Kfailed */
5079 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005080 goto out;
5081 }
5082 }
5083 if (SVal__isA(svOld)) {
5084 /* writing no-access memory (sigh); leave unchanged */
5085 /* check for no pollution */
5086 tl_assert(svOld == SVal_NOACCESS);
5087 svNew = SVal_NOACCESS;
5088 goto out;
5089 }
sewardj23f12002009-07-24 08:45:08 +00005090 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005091 tl_assert(0);
5092
5093 out:
sewardj8f5374e2008-12-07 11:40:17 +00005094 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005095 tl_assert(is_sane_SVal_C(svNew));
5096 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005097 if (UNLIKELY(svNew != svOld)) {
5098 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005099 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005100 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005101 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005102 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005103 }
5104 }
5105 return svNew;
5106}
5107
5108
5109/////////////////////////////////////////////////////////
5110// //
5111// Apply core MSM to specific memory locations //
5112// //
5113/////////////////////////////////////////////////////////
5114
sewardj23f12002009-07-24 08:45:08 +00005115/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005116
sewardj23f12002009-07-24 08:45:08 +00005117static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005118 CacheLine* cl;
5119 UWord cloff, tno, toff;
5120 SVal svOld, svNew;
5121 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005122 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005123 cl = get_cacheline(a);
5124 cloff = get_cacheline_offset(a);
5125 tno = get_treeno(a);
5126 toff = get_tree_offset(a); /* == 0 .. 7 */
5127 descr = cl->descrs[tno];
5128 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5129 SVal* tree = &cl->svals[tno << 3];
5130 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005131 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005132 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5133 }
5134 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005135 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005136 if (CHECK_ZSM)
5137 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005138 cl->svals[cloff] = svNew;
5139}
5140
sewardj23f12002009-07-24 08:45:08 +00005141static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005142 CacheLine* cl;
5143 UWord cloff, tno, toff;
5144 SVal svOld, svNew;
5145 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005146 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005147 cl = get_cacheline(a);
5148 cloff = get_cacheline_offset(a);
5149 tno = get_treeno(a);
5150 toff = get_tree_offset(a); /* == 0 .. 7 */
5151 descr = cl->descrs[tno];
5152 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5153 SVal* tree = &cl->svals[tno << 3];
5154 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005155 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005156 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5157 }
5158 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005159 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005160 if (CHECK_ZSM)
5161 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005162 cl->svals[cloff] = svNew;
5163}
5164
sewardj23f12002009-07-24 08:45:08 +00005165/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005166
sewardj23f12002009-07-24 08:45:08 +00005167static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005168 CacheLine* cl;
5169 UWord cloff, tno, toff;
5170 SVal svOld, svNew;
5171 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005172 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005173 if (UNLIKELY(!aligned16(a))) goto slowcase;
5174 cl = get_cacheline(a);
5175 cloff = get_cacheline_offset(a);
5176 tno = get_treeno(a);
5177 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5178 descr = cl->descrs[tno];
5179 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5180 if (valid_value_is_below_me_16(descr, toff)) {
5181 goto slowcase;
5182 } else {
5183 SVal* tree = &cl->svals[tno << 3];
5184 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5185 }
sewardj8f5374e2008-12-07 11:40:17 +00005186 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005187 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5188 }
5189 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005190 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005191 if (CHECK_ZSM)
5192 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005193 cl->svals[cloff] = svNew;
5194 return;
5195 slowcase: /* misaligned, or must go further down the tree */
5196 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005197 zsm_sapply08__msmcread( thr, a + 0 );
5198 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005199}
5200
sewardj23f12002009-07-24 08:45:08 +00005201static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005202 CacheLine* cl;
5203 UWord cloff, tno, toff;
5204 SVal svOld, svNew;
5205 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005206 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005207 if (UNLIKELY(!aligned16(a))) goto slowcase;
5208 cl = get_cacheline(a);
5209 cloff = get_cacheline_offset(a);
5210 tno = get_treeno(a);
5211 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5212 descr = cl->descrs[tno];
5213 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5214 if (valid_value_is_below_me_16(descr, toff)) {
5215 goto slowcase;
5216 } else {
5217 SVal* tree = &cl->svals[tno << 3];
5218 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5219 }
sewardj8f5374e2008-12-07 11:40:17 +00005220 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005221 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5222 }
5223 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005224 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005225 if (CHECK_ZSM)
5226 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005227 cl->svals[cloff] = svNew;
5228 return;
5229 slowcase: /* misaligned, or must go further down the tree */
5230 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005231 zsm_sapply08__msmcwrite( thr, a + 0 );
5232 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005233}
5234
sewardj23f12002009-07-24 08:45:08 +00005235/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005236
sewardj23f12002009-07-24 08:45:08 +00005237static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005238 CacheLine* cl;
5239 UWord cloff, tno, toff;
5240 SVal svOld, svNew;
5241 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005242 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005243 if (UNLIKELY(!aligned32(a))) goto slowcase;
5244 cl = get_cacheline(a);
5245 cloff = get_cacheline_offset(a);
5246 tno = get_treeno(a);
5247 toff = get_tree_offset(a); /* == 0 or 4 */
5248 descr = cl->descrs[tno];
5249 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5250 if (valid_value_is_above_me_32(descr, toff)) {
5251 SVal* tree = &cl->svals[tno << 3];
5252 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5253 } else {
5254 goto slowcase;
5255 }
sewardj8f5374e2008-12-07 11:40:17 +00005256 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005257 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5258 }
5259 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005260 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005261 if (CHECK_ZSM)
5262 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005263 cl->svals[cloff] = svNew;
5264 return;
5265 slowcase: /* misaligned, or must go further down the tree */
5266 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005267 zsm_sapply16__msmcread( thr, a + 0 );
5268 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005269}
5270
sewardj23f12002009-07-24 08:45:08 +00005271static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005272 CacheLine* cl;
5273 UWord cloff, tno, toff;
5274 SVal svOld, svNew;
5275 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005276 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005277 if (UNLIKELY(!aligned32(a))) goto slowcase;
5278 cl = get_cacheline(a);
5279 cloff = get_cacheline_offset(a);
5280 tno = get_treeno(a);
5281 toff = get_tree_offset(a); /* == 0 or 4 */
5282 descr = cl->descrs[tno];
5283 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5284 if (valid_value_is_above_me_32(descr, toff)) {
5285 SVal* tree = &cl->svals[tno << 3];
5286 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5287 } else {
5288 goto slowcase;
5289 }
sewardj8f5374e2008-12-07 11:40:17 +00005290 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005291 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5292 }
5293 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005294 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005295 if (CHECK_ZSM)
5296 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005297 cl->svals[cloff] = svNew;
5298 return;
5299 slowcase: /* misaligned, or must go further down the tree */
5300 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005301 zsm_sapply16__msmcwrite( thr, a + 0 );
5302 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005303}
5304
sewardj23f12002009-07-24 08:45:08 +00005305/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005306
sewardj23f12002009-07-24 08:45:08 +00005307static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005308 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005309 UWord cloff, tno;
5310 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005311 SVal svOld, svNew;
5312 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005313 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005314 if (UNLIKELY(!aligned64(a))) goto slowcase;
5315 cl = get_cacheline(a);
5316 cloff = get_cacheline_offset(a);
5317 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005318 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005319 descr = cl->descrs[tno];
5320 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5321 goto slowcase;
5322 }
5323 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005324 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005325 if (CHECK_ZSM)
5326 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005327 cl->svals[cloff] = svNew;
5328 return;
5329 slowcase: /* misaligned, or must go further down the tree */
5330 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005331 zsm_sapply32__msmcread( thr, a + 0 );
5332 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005333}
5334
sewardj23f12002009-07-24 08:45:08 +00005335static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005336 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005337 UWord cloff, tno;
5338 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005339 SVal svOld, svNew;
5340 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005341 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005342 if (UNLIKELY(!aligned64(a))) goto slowcase;
5343 cl = get_cacheline(a);
5344 cloff = get_cacheline_offset(a);
5345 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005346 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005347 descr = cl->descrs[tno];
5348 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5349 goto slowcase;
5350 }
5351 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005352 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005353 if (CHECK_ZSM)
5354 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005355 cl->svals[cloff] = svNew;
5356 return;
5357 slowcase: /* misaligned, or must go further down the tree */
5358 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005359 zsm_sapply32__msmcwrite( thr, a + 0 );
5360 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005361}
5362
sewardj23f12002009-07-24 08:45:08 +00005363/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005364
5365static
sewardj23f12002009-07-24 08:45:08 +00005366void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005367 CacheLine* cl;
5368 UWord cloff, tno, toff;
5369 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005370 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005371 cl = get_cacheline(a);
5372 cloff = get_cacheline_offset(a);
5373 tno = get_treeno(a);
5374 toff = get_tree_offset(a); /* == 0 .. 7 */
5375 descr = cl->descrs[tno];
5376 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5377 SVal* tree = &cl->svals[tno << 3];
5378 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005379 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005380 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5381 }
5382 tl_assert(svNew != SVal_INVALID);
5383 cl->svals[cloff] = svNew;
5384}
5385
sewardj23f12002009-07-24 08:45:08 +00005386/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005387
5388static
sewardj23f12002009-07-24 08:45:08 +00005389void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005390 CacheLine* cl;
5391 UWord cloff, tno, toff;
5392 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005393 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005394 if (UNLIKELY(!aligned16(a))) goto slowcase;
5395 cl = get_cacheline(a);
5396 cloff = get_cacheline_offset(a);
5397 tno = get_treeno(a);
5398 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5399 descr = cl->descrs[tno];
5400 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5401 if (valid_value_is_below_me_16(descr, toff)) {
5402 /* Writing at this level. Need to fix up 'descr'. */
5403 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5404 /* At this point, the tree does not match cl->descr[tno] any
5405 more. The assignments below will fix it up. */
5406 } else {
5407 /* We can't indiscriminately write on the w16 node as in the
5408 w64 case, as that might make the node inconsistent with
5409 its parent. So first, pull down to this level. */
5410 SVal* tree = &cl->svals[tno << 3];
5411 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005412 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005413 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5414 }
5415 }
5416 tl_assert(svNew != SVal_INVALID);
5417 cl->svals[cloff + 0] = svNew;
5418 cl->svals[cloff + 1] = SVal_INVALID;
5419 return;
5420 slowcase: /* misaligned */
5421 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005422 zsm_swrite08( a + 0, svNew );
5423 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005424}
5425
sewardj23f12002009-07-24 08:45:08 +00005426/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005427
5428static
sewardj23f12002009-07-24 08:45:08 +00005429void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005430 CacheLine* cl;
5431 UWord cloff, tno, toff;
5432 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005433 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005434 if (UNLIKELY(!aligned32(a))) goto slowcase;
5435 cl = get_cacheline(a);
5436 cloff = get_cacheline_offset(a);
5437 tno = get_treeno(a);
5438 toff = get_tree_offset(a); /* == 0 or 4 */
5439 descr = cl->descrs[tno];
5440 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5441 if (valid_value_is_above_me_32(descr, toff)) {
5442 /* We can't indiscriminately write on the w32 node as in the
5443 w64 case, as that might make the node inconsistent with
5444 its parent. So first, pull down to this level. */
5445 SVal* tree = &cl->svals[tno << 3];
5446 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005447 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005448 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5449 } else {
5450 /* Writing at this level. Need to fix up 'descr'. */
5451 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5452 /* At this point, the tree does not match cl->descr[tno] any
5453 more. The assignments below will fix it up. */
5454 }
5455 }
5456 tl_assert(svNew != SVal_INVALID);
5457 cl->svals[cloff + 0] = svNew;
5458 cl->svals[cloff + 1] = SVal_INVALID;
5459 cl->svals[cloff + 2] = SVal_INVALID;
5460 cl->svals[cloff + 3] = SVal_INVALID;
5461 return;
5462 slowcase: /* misaligned */
5463 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005464 zsm_swrite16( a + 0, svNew );
5465 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005466}
5467
sewardj23f12002009-07-24 08:45:08 +00005468/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005469
5470static
sewardj23f12002009-07-24 08:45:08 +00005471void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005472 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005473 UWord cloff, tno;
5474 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00005475 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005476 if (UNLIKELY(!aligned64(a))) goto slowcase;
5477 cl = get_cacheline(a);
5478 cloff = get_cacheline_offset(a);
5479 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005480 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005481 cl->descrs[tno] = TREE_DESCR_64;
5482 tl_assert(svNew != SVal_INVALID);
5483 cl->svals[cloff + 0] = svNew;
5484 cl->svals[cloff + 1] = SVal_INVALID;
5485 cl->svals[cloff + 2] = SVal_INVALID;
5486 cl->svals[cloff + 3] = SVal_INVALID;
5487 cl->svals[cloff + 4] = SVal_INVALID;
5488 cl->svals[cloff + 5] = SVal_INVALID;
5489 cl->svals[cloff + 6] = SVal_INVALID;
5490 cl->svals[cloff + 7] = SVal_INVALID;
5491 return;
5492 slowcase: /* misaligned */
5493 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005494 zsm_swrite32( a + 0, svNew );
5495 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005496}
5497
sewardj23f12002009-07-24 08:45:08 +00005498/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005499
5500static
sewardj23f12002009-07-24 08:45:08 +00005501SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005502 CacheLine* cl;
5503 UWord cloff, tno, toff;
5504 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005505 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005506 cl = get_cacheline(a);
5507 cloff = get_cacheline_offset(a);
5508 tno = get_treeno(a);
5509 toff = get_tree_offset(a); /* == 0 .. 7 */
5510 descr = cl->descrs[tno];
5511 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5512 SVal* tree = &cl->svals[tno << 3];
5513 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5514 }
5515 return cl->svals[cloff];
5516}
5517
sewardj23f12002009-07-24 08:45:08 +00005518static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00005519 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00005520 stats__cline_scopy08s++;
5521 sv = zsm_sread08( src );
5522 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00005523}
5524
5525
sewardj23f12002009-07-24 08:45:08 +00005526/* Block-copy states (needed for implementing realloc()). Note this
5527 doesn't change the filtering arrangements. The caller of
5528 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00005529
sewardj23f12002009-07-24 08:45:08 +00005530static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005531{
5532 SizeT i;
5533 if (len == 0)
5534 return;
5535
5536 /* assert for non-overlappingness */
5537 tl_assert(src+len <= dst || dst+len <= src);
5538
5539 /* To be simple, just copy byte by byte. But so as not to wreck
5540 performance for later accesses to dst[0 .. len-1], normalise
5541 destination lines as we finish with them, and also normalise the
5542 line containing the first and last address. */
5543 for (i = 0; i < len; i++) {
5544 Bool normalise
5545 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5546 || i == 0 /* first in range */
5547 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00005548 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00005549 }
5550}
5551
5552
5553/* For setting address ranges to a given value. Has considerable
5554 sophistication so as to avoid generating large numbers of pointless
5555 cache loads/writebacks for large ranges. */
5556
5557/* Do small ranges in-cache, in the obvious way. */
5558static
sewardj23f12002009-07-24 08:45:08 +00005559void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005560{
5561 /* fast track a couple of common cases */
5562 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00005563 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005564 return;
5565 }
5566 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00005567 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005568 return;
5569 }
5570
5571 /* be completely general (but as efficient as possible) */
5572 if (len == 0) return;
5573
5574 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005575 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005576 a += 1;
5577 len -= 1;
5578 tl_assert(aligned16(a));
5579 }
5580 if (len == 0) return;
5581
5582 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005583 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005584 a += 2;
5585 len -= 2;
5586 tl_assert(aligned32(a));
5587 }
5588 if (len == 0) return;
5589
5590 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005591 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005592 a += 4;
5593 len -= 4;
5594 tl_assert(aligned64(a));
5595 }
5596 if (len == 0) return;
5597
5598 if (len >= 8) {
5599 tl_assert(aligned64(a));
5600 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00005601 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005602 a += 8;
5603 len -= 8;
5604 }
5605 tl_assert(aligned64(a));
5606 }
5607 if (len == 0) return;
5608
5609 if (len >= 4)
5610 tl_assert(aligned32(a));
5611 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005612 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005613 a += 4;
5614 len -= 4;
5615 }
5616 if (len == 0) return;
5617
5618 if (len >= 2)
5619 tl_assert(aligned16(a));
5620 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005621 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005622 a += 2;
5623 len -= 2;
5624 }
5625 if (len == 0) return;
5626
5627 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005628 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00005629 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00005630 len -= 1;
5631 }
5632 tl_assert(len == 0);
5633}
5634
5635
sewardj23f12002009-07-24 08:45:08 +00005636/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00005637 for larger ranges, try to operate directly on the out-of-cache
5638 representation, rather than dragging lines into the cache,
5639 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00005640 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00005641
sewardj23f12002009-07-24 08:45:08 +00005642 Note that this doesn't change the filtering arrangements. The
5643 caller of zsm_sset_range needs to attend to that. */
5644
5645static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005646{
5647 tl_assert(svNew != SVal_INVALID);
5648 stats__cache_make_New_arange += (ULong)len;
5649
5650 if (0 && len > 500)
5651 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
5652
5653 if (0) {
5654 static UWord n_New_in_cache = 0;
5655 static UWord n_New_not_in_cache = 0;
5656 /* tag is 'a' with the in-line offset masked out,
5657 eg a[31]..a[4] 0000 */
5658 Addr tag = a & ~(N_LINE_ARANGE - 1);
5659 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5660 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5661 n_New_in_cache++;
5662 } else {
5663 n_New_not_in_cache++;
5664 }
5665 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5666 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5667 n_New_in_cache, n_New_not_in_cache );
5668 }
5669
5670 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00005671 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005672 } else {
5673 Addr before_start = a;
5674 Addr aligned_start = cacheline_ROUNDUP(a);
5675 Addr after_start = cacheline_ROUNDDN(a + len);
5676 UWord before_len = aligned_start - before_start;
5677 UWord aligned_len = after_start - aligned_start;
5678 UWord after_len = a + len - after_start;
5679 tl_assert(before_start <= aligned_start);
5680 tl_assert(aligned_start <= after_start);
5681 tl_assert(before_len < N_LINE_ARANGE);
5682 tl_assert(after_len < N_LINE_ARANGE);
5683 tl_assert(get_cacheline_offset(aligned_start) == 0);
5684 if (get_cacheline_offset(a) == 0) {
5685 tl_assert(before_len == 0);
5686 tl_assert(a == aligned_start);
5687 }
5688 if (get_cacheline_offset(a+len) == 0) {
5689 tl_assert(after_len == 0);
5690 tl_assert(after_start == a+len);
5691 }
5692 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005693 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005694 }
5695 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005696 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005697 }
5698 stats__cache_make_New_inZrep += (ULong)aligned_len;
5699
5700 while (1) {
5701 Addr tag;
5702 UWord wix;
5703 if (aligned_start >= after_start)
5704 break;
5705 tl_assert(get_cacheline_offset(aligned_start) == 0);
5706 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5707 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5708 if (tag == cache_shmem.tags0[wix]) {
5709 UWord i;
5710 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005711 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005712 } else {
5713 UWord i;
5714 Word zix;
5715 SecMap* sm;
5716 LineZ* lineZ;
5717 /* This line is not in the cache. Do not force it in; instead
5718 modify it in-place. */
5719 /* find the Z line to write in and rcdec it or the
5720 associated F line. */
5721 find_Z_for_writing( &sm, &zix, tag );
5722 tl_assert(sm);
5723 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5724 lineZ = &sm->linesZ[zix];
5725 lineZ->dict[0] = svNew;
5726 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5727 for (i = 0; i < N_LINE_ARANGE/4; i++)
5728 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5729 rcinc_LineZ(lineZ);
5730 }
5731 aligned_start += N_LINE_ARANGE;
5732 aligned_len -= N_LINE_ARANGE;
5733 }
5734 tl_assert(aligned_start == after_start);
5735 tl_assert(aligned_len == 0);
5736 }
5737}
5738
5739
5740/////////////////////////////////////////////////////////
5741// //
sewardj23f12002009-07-24 08:45:08 +00005742// Front-filtering accesses //
5743// //
5744/////////////////////////////////////////////////////////
5745
5746static UWord stats__f_ac = 0;
5747static UWord stats__f_sk = 0;
5748
5749#if 0
5750# define STATS__F_SHOW \
5751 do { \
5752 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5753 VG_(printf)("filters: ac %lu sk %lu\n", \
5754 stats__f_ac, stats__f_sk); \
5755 } while (0)
5756#else
5757# define STATS__F_SHOW /* */
5758#endif
5759
5760void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5761 stats__f_ac++;
5762 STATS__F_SHOW;
5763 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5764 stats__f_sk++;
5765 return;
5766 }
5767 zsm_sapply08__msmcwrite(thr, a);
5768}
5769
5770void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5771 stats__f_ac++;
5772 STATS__F_SHOW;
5773 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5774 stats__f_sk++;
5775 return;
5776 }
5777 zsm_sapply16__msmcwrite(thr, a);
5778}
5779
5780void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5781 stats__f_ac++;
5782 STATS__F_SHOW;
5783 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5784 stats__f_sk++;
5785 return;
5786 }
5787 zsm_sapply32__msmcwrite(thr, a);
5788}
5789
5790void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5791 stats__f_ac++;
5792 STATS__F_SHOW;
5793 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5794 stats__f_sk++;
5795 return;
5796 }
5797 zsm_sapply64__msmcwrite(thr, a);
5798}
5799
5800void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5801{
5802 /* fast track a couple of common cases */
5803 if (len == 4 && aligned32(a)) {
5804 zsm_sapply32_f__msmcwrite( thr, a );
5805 return;
5806 }
5807 if (len == 8 && aligned64(a)) {
5808 zsm_sapply64_f__msmcwrite( thr, a );
5809 return;
5810 }
5811
5812 /* be completely general (but as efficient as possible) */
5813 if (len == 0) return;
5814
5815 if (!aligned16(a) && len >= 1) {
5816 zsm_sapply08_f__msmcwrite( thr, a );
5817 a += 1;
5818 len -= 1;
5819 tl_assert(aligned16(a));
5820 }
5821 if (len == 0) return;
5822
5823 if (!aligned32(a) && len >= 2) {
5824 zsm_sapply16_f__msmcwrite( thr, a );
5825 a += 2;
5826 len -= 2;
5827 tl_assert(aligned32(a));
5828 }
5829 if (len == 0) return;
5830
5831 if (!aligned64(a) && len >= 4) {
5832 zsm_sapply32_f__msmcwrite( thr, a );
5833 a += 4;
5834 len -= 4;
5835 tl_assert(aligned64(a));
5836 }
5837 if (len == 0) return;
5838
5839 if (len >= 8) {
5840 tl_assert(aligned64(a));
5841 while (len >= 8) {
5842 zsm_sapply64_f__msmcwrite( thr, a );
5843 a += 8;
5844 len -= 8;
5845 }
5846 tl_assert(aligned64(a));
5847 }
5848 if (len == 0) return;
5849
5850 if (len >= 4)
5851 tl_assert(aligned32(a));
5852 if (len >= 4) {
5853 zsm_sapply32_f__msmcwrite( thr, a );
5854 a += 4;
5855 len -= 4;
5856 }
5857 if (len == 0) return;
5858
5859 if (len >= 2)
5860 tl_assert(aligned16(a));
5861 if (len >= 2) {
5862 zsm_sapply16_f__msmcwrite( thr, a );
5863 a += 2;
5864 len -= 2;
5865 }
5866 if (len == 0) return;
5867
5868 if (len >= 1) {
5869 zsm_sapply08_f__msmcwrite( thr, a );
5870 //a += 1;
5871 len -= 1;
5872 }
5873 tl_assert(len == 0);
5874}
5875
5876void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5877 stats__f_ac++;
5878 STATS__F_SHOW;
5879 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5880 stats__f_sk++;
5881 return;
5882 }
5883 zsm_sapply08__msmcread(thr, a);
5884}
5885
5886void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5887 stats__f_ac++;
5888 STATS__F_SHOW;
5889 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5890 stats__f_sk++;
5891 return;
5892 }
5893 zsm_sapply16__msmcread(thr, a);
5894}
5895
5896void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5897 stats__f_ac++;
5898 STATS__F_SHOW;
5899 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5900 stats__f_sk++;
5901 return;
5902 }
5903 zsm_sapply32__msmcread(thr, a);
5904}
5905
5906void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5907 stats__f_ac++;
5908 STATS__F_SHOW;
5909 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5910 stats__f_sk++;
5911 return;
5912 }
5913 zsm_sapply64__msmcread(thr, a);
5914}
5915
5916void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5917{
5918 /* fast track a couple of common cases */
5919 if (len == 4 && aligned32(a)) {
5920 zsm_sapply32_f__msmcread( thr, a );
5921 return;
5922 }
5923 if (len == 8 && aligned64(a)) {
5924 zsm_sapply64_f__msmcread( thr, a );
5925 return;
5926 }
5927
5928 /* be completely general (but as efficient as possible) */
5929 if (len == 0) return;
5930
5931 if (!aligned16(a) && len >= 1) {
5932 zsm_sapply08_f__msmcread( thr, a );
5933 a += 1;
5934 len -= 1;
5935 tl_assert(aligned16(a));
5936 }
5937 if (len == 0) return;
5938
5939 if (!aligned32(a) && len >= 2) {
5940 zsm_sapply16_f__msmcread( thr, a );
5941 a += 2;
5942 len -= 2;
5943 tl_assert(aligned32(a));
5944 }
5945 if (len == 0) return;
5946
5947 if (!aligned64(a) && len >= 4) {
5948 zsm_sapply32_f__msmcread( thr, a );
5949 a += 4;
5950 len -= 4;
5951 tl_assert(aligned64(a));
5952 }
5953 if (len == 0) return;
5954
5955 if (len >= 8) {
5956 tl_assert(aligned64(a));
5957 while (len >= 8) {
5958 zsm_sapply64_f__msmcread( thr, a );
5959 a += 8;
5960 len -= 8;
5961 }
5962 tl_assert(aligned64(a));
5963 }
5964 if (len == 0) return;
5965
5966 if (len >= 4)
5967 tl_assert(aligned32(a));
5968 if (len >= 4) {
5969 zsm_sapply32_f__msmcread( thr, a );
5970 a += 4;
5971 len -= 4;
5972 }
5973 if (len == 0) return;
5974
5975 if (len >= 2)
5976 tl_assert(aligned16(a));
5977 if (len >= 2) {
5978 zsm_sapply16_f__msmcread( thr, a );
5979 a += 2;
5980 len -= 2;
5981 }
5982 if (len == 0) return;
5983
5984 if (len >= 1) {
5985 zsm_sapply08_f__msmcread( thr, a );
5986 //a += 1;
5987 len -= 1;
5988 }
5989 tl_assert(len == 0);
5990}
5991
5992void libhb_Thr_resumes ( Thr* thr )
5993{
5994 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005995 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00005996 tl_assert(!thr->llexit_done);
sewardj23f12002009-07-24 08:45:08 +00005997 Filter__clear(thr->filter, "libhb_Thr_resumes");
5998 /* A kludge, but .. if this thread doesn't have any marker stacks
5999 at all, get one right now. This is easier than figuring out
6000 exactly when at thread startup we can and can't take a stack
6001 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00006002 if (HG_(clo_history_level) == 1) {
6003 tl_assert(thr->local_Kws_n_stacks);
6004 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
6005 note_local_Kw_n_stack_for(thr);
6006 }
sewardj23f12002009-07-24 08:45:08 +00006007}
6008
6009
6010/////////////////////////////////////////////////////////
6011// //
sewardjf98e1c02008-10-25 16:22:41 +00006012// Synchronisation objects //
6013// //
6014/////////////////////////////////////////////////////////
6015
sewardjffce8152011-06-24 10:09:41 +00006016/* A double linked list of all the SO's. */
6017SO* admin_SO = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006018
sewardjffce8152011-06-24 10:09:41 +00006019static SO* SO__Alloc ( void )
6020{
sewardjf98e1c02008-10-25 16:22:41 +00006021 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6022 so->viR = VtsID_INVALID;
6023 so->viW = VtsID_INVALID;
6024 so->magic = SO_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00006025 /* Add to double linked list */
6026 if (admin_SO) {
6027 tl_assert(admin_SO->admin_prev == NULL);
6028 admin_SO->admin_prev = so;
6029 so->admin_next = admin_SO;
6030 } else {
6031 so->admin_next = NULL;
6032 }
6033 so->admin_prev = NULL;
6034 admin_SO = so;
6035 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006036 return so;
6037}
sewardjffce8152011-06-24 10:09:41 +00006038
6039static void SO__Dealloc ( SO* so )
6040{
sewardjf98e1c02008-10-25 16:22:41 +00006041 tl_assert(so);
6042 tl_assert(so->magic == SO_MAGIC);
6043 if (so->viR == VtsID_INVALID) {
6044 tl_assert(so->viW == VtsID_INVALID);
6045 } else {
6046 tl_assert(so->viW != VtsID_INVALID);
6047 VtsID__rcdec(so->viR);
6048 VtsID__rcdec(so->viW);
6049 }
6050 so->magic = 0;
sewardjffce8152011-06-24 10:09:41 +00006051 /* Del from double linked list */
6052 if (so->admin_prev)
6053 so->admin_prev->admin_next = so->admin_next;
6054 if (so->admin_next)
6055 so->admin_next->admin_prev = so->admin_prev;
6056 if (so == admin_SO)
6057 admin_SO = so->admin_next;
6058 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006059 HG_(free)( so );
6060}
6061
6062
6063/////////////////////////////////////////////////////////
6064// //
6065// Top Level API //
6066// //
6067/////////////////////////////////////////////////////////
6068
florian6bd9dc12012-11-23 16:17:43 +00006069static void show_thread_state ( const HChar* str, Thr* t )
sewardjf98e1c02008-10-25 16:22:41 +00006070{
6071 if (1) return;
6072 if (t->viR == t->viW) {
6073 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6074 VtsID__pp( t->viR );
6075 VG_(printf)("%s","\n");
6076 } else {
6077 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6078 VtsID__pp( t->viR );
6079 VG_(printf)(" viW %u==", t->viW);
6080 VtsID__pp( t->viW );
6081 VG_(printf)("%s","\n");
6082 }
6083}
6084
6085
6086Thr* libhb_init (
6087 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00006088 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00006089 )
6090{
6091 Thr* thr;
6092 VtsID vi;
sewardje4cce742011-02-24 15:25:24 +00006093
6094 // We will have to have to store a large number of these,
6095 // so make sure they're the size we expect them to be.
6096 tl_assert(sizeof(ScalarTS) == 8);
sewardjffce8152011-06-24 10:09:41 +00006097
6098 /* because first 1024 unusable */
6099 tl_assert(SCALARTS_N_THRBITS >= 11);
6100 /* so as to fit in a UInt w/ 3 bits to spare (see defn of
6101 Thr_n_RCEC). */
6102 tl_assert(SCALARTS_N_THRBITS <= 29);
6103
6104 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6105 (32-bit). It's not correctness-critical, but there are a lot of
6106 them, so it's important from a space viewpoint. Unfortunately
6107 we simply can't pack it into 2 words on a 32-bit target. */
6108 if (sizeof(UWord) == 8) {
6109 tl_assert(sizeof(Thr_n_RCEC) == 16);
6110 } else {
6111 tl_assert(sizeof(Thr_n_RCEC) == 12);
6112 }
6113
6114 /* Word sets really are 32 bits. Even on a 64 bit target. */
6115 tl_assert(sizeof(WordSetID) == 4);
6116 tl_assert(sizeof(WordSet) == sizeof(WordSetID));
sewardje4cce742011-02-24 15:25:24 +00006117
sewardjf98e1c02008-10-25 16:22:41 +00006118 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00006119 tl_assert(get_EC);
6120 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00006121 main_get_EC = get_EC;
6122
6123 // No need to initialise hg_wordfm.
6124 // No need to initialise hg_wordset.
6125
sewardj7aa38a92011-02-27 23:04:12 +00006126 /* Allocated once and never deallocated. Used as a temporary in
6127 VTS singleton, tick and join operations. */
6128 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6129 temp_max_sized_VTS->id = VtsID_INVALID;
philippec3508652015-03-28 12:01:58 +00006130 verydead_thread_tables_init();
sewardjf98e1c02008-10-25 16:22:41 +00006131 vts_set_init();
6132 vts_tab_init();
6133 event_map_init();
6134 VtsID__invalidate_caches();
6135
6136 // initialise shadow memory
philippe1475a7f2015-05-11 19:45:08 +00006137 zsm_init( );
sewardjf98e1c02008-10-25 16:22:41 +00006138
6139 thr = Thr__new();
6140 vi = VtsID__mk_Singleton( thr, 1 );
6141 thr->viR = vi;
6142 thr->viW = vi;
6143 VtsID__rcinc(thr->viR);
6144 VtsID__rcinc(thr->viW);
6145
6146 show_thread_state(" root", thr);
6147 return thr;
6148}
6149
sewardj23f12002009-07-24 08:45:08 +00006150
sewardjf98e1c02008-10-25 16:22:41 +00006151Thr* libhb_create ( Thr* parent )
6152{
6153 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6154 the child's index. Since the child's index is guaranteed
6155 unique, it has never been seen before, so the implicit value
6156 before the tick is zero and after that is one. */
6157 Thr* child = Thr__new();
6158
6159 child->viR = VtsID__tick( parent->viR, child );
6160 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00006161 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00006162 VtsID__rcinc(child->viR);
6163 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006164 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00006165 early for that - it may not have a valid TId yet. So, let
6166 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00006167
6168 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6169 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6170
6171 /* and the parent has to move along too */
6172 VtsID__rcdec(parent->viR);
6173 VtsID__rcdec(parent->viW);
6174 parent->viR = VtsID__tick( parent->viR, parent );
6175 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00006176 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00006177 VtsID__rcinc(parent->viR);
6178 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006179 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00006180
6181 show_thread_state(" child", child);
6182 show_thread_state("parent", parent);
6183
6184 return child;
6185}
6186
6187/* Shut down the library, and print stats (in fact that's _all_
6188 this is for. */
6189void libhb_shutdown ( Bool show_stats )
6190{
6191 if (show_stats) {
6192 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6193 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6194 stats__secmaps_allocd,
6195 stats__secmap_ga_space_covered);
6196 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6197 stats__secmap_linesZ_allocd,
6198 stats__secmap_linesZ_bytes);
6199 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
6200 stats__secmap_linesF_allocd,
6201 stats__secmap_linesF_bytes);
philippef54cb662015-05-10 22:19:31 +00006202 VG_(printf)(" secmaps: %'10lu in map (can be scanGCed %'5lu)"
6203 " #%lu scanGC \n",
6204 stats__secmaps_in_map_shmem,
6205 shmem__SecMap_do_GC(False /* really do GC */),
6206 stats__secmaps_scanGC);
6207 tl_assert (VG_(sizeFM) (map_shmem) == stats__secmaps_in_map_shmem);
6208 VG_(printf)(" secmaps: %'10lu in freelist,"
6209 " total (scanGCed %'lu, ssetGCed %'lu)\n",
6210 SecMap_freelist_length(),
6211 stats__secmaps_scanGCed,
6212 stats__secmaps_ssetGCed);
sewardjf98e1c02008-10-25 16:22:41 +00006213 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6214 stats__secmaps_search, stats__secmaps_search_slow);
6215
6216 VG_(printf)("%s","\n");
6217 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6218 stats__cache_totrefs, stats__cache_totmisses );
6219 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6220 stats__cache_Z_fetches, stats__cache_F_fetches );
6221 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6222 stats__cache_Z_wbacks, stats__cache_F_wbacks );
philippef54cb662015-05-10 22:19:31 +00006223 VG_(printf)(" cache: %'14lu flushes_invals\n",
6224 stats__cache_flushes_invals );
sewardjf98e1c02008-10-25 16:22:41 +00006225 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6226 stats__cache_make_New_arange,
6227 stats__cache_make_New_inZrep);
6228
6229 VG_(printf)("%s","\n");
6230 VG_(printf)(" cline: %'10lu normalises\n",
6231 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00006232 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6233 stats__cline_cread64s,
6234 stats__cline_cread32s,
6235 stats__cline_cread16s,
6236 stats__cline_cread08s );
6237 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6238 stats__cline_cwrite64s,
6239 stats__cline_cwrite32s,
6240 stats__cline_cwrite16s,
6241 stats__cline_cwrite08s );
6242 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6243 stats__cline_swrite64s,
6244 stats__cline_swrite32s,
6245 stats__cline_swrite16s,
6246 stats__cline_swrite08s );
6247 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6248 stats__cline_sread08s, stats__cline_scopy08s );
philippef54cb662015-05-10 22:19:31 +00006249 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu"
6250 " 2to1 %'12lu\n",
6251 stats__cline_64to32splits, stats__cline_32to16splits,
6252 stats__cline_16to8splits );
6253 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu"
6254 " 2to1 %'12lu\n",
6255 stats__cline_64to32pulldown, stats__cline_32to16pulldown,
6256 stats__cline_16to8pulldown );
sewardjf98e1c02008-10-25 16:22:41 +00006257 if (0)
philippef54cb662015-05-10 22:19:31 +00006258 VG_(printf)(" cline: sizeof(CacheLineZ) %ld,"
6259 " covers %ld bytes of arange\n",
6260 (Word)sizeof(LineZ),
6261 (Word)N_LINE_ARANGE);
sewardjf98e1c02008-10-25 16:22:41 +00006262
6263 VG_(printf)("%s","\n");
6264
sewardjc8028ad2010-05-05 09:34:42 +00006265 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006266 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00006267 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006268 stats__msmcwrite, stats__msmcwrite_change);
6269 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6270 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00006271 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6272 stats__join2_queries, stats__join2_misses);
6273
6274 VG_(printf)("%s","\n");
philippef54cb662015-05-10 22:19:31 +00006275 VG_(printf)(" libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6276 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6277 VG_(printf)(" libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6278 stats__vts__cmp_structural, stats__vts__cmp_structural_slow);
6279 VG_(printf)(" libhb: VTSset: find__or__clone_and_add %'lu"
6280 " (%'lu allocd)\n",
sewardj7aa38a92011-02-27 23:04:12 +00006281 stats__vts_set__focaa, stats__vts_set__focaa_a );
sewardjc8028ad2010-05-05 09:34:42 +00006282 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6283 stats__vts__indexat_slow );
6284
6285 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00006286 VG_(printf)(
6287 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6288 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6289 );
philippef54cb662015-05-10 22:19:31 +00006290 VG_(printf)(" libhb: #%lu vts_tab GC\n", stats__vts_tab_GC);
sewardjf98e1c02008-10-25 16:22:41 +00006291 VG_(printf)( " libhb: %lu entries in vts_set\n",
6292 VG_(sizeFM)( vts_set ) );
6293
6294 VG_(printf)("%s","\n");
philippe900c5352015-03-24 14:02:44 +00006295 {
6296 UInt live = 0;
6297 UInt llexit_done = 0;
6298 UInt joinedwith_done = 0;
6299 UInt llexit_and_joinedwith_done = 0;
6300
6301 Thread* hgthread = get_admin_threads();
6302 tl_assert(hgthread);
6303 while (hgthread) {
6304 Thr* hbthr = hgthread->hbthr;
6305 tl_assert(hbthr);
6306 if (hbthr->llexit_done && hbthr->joinedwith_done)
6307 llexit_and_joinedwith_done++;
6308 else if (hbthr->llexit_done)
6309 llexit_done++;
6310 else if (hbthr->joinedwith_done)
6311 joinedwith_done++;
6312 else
6313 live++;
6314 hgthread = hgthread->admin;
6315 }
6316 VG_(printf)(" libhb: threads live: %d exit_and_joinedwith %d"
6317 " exit %d joinedwith %d\n",
6318 live, llexit_and_joinedwith_done,
6319 llexit_done, joinedwith_done);
philippec3508652015-03-28 12:01:58 +00006320 VG_(printf)(" libhb: %d verydead_threads, "
6321 "%d verydead_threads_not_pruned\n",
6322 (int) VG_(sizeXA)( verydead_thread_table),
6323 (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6324 tl_assert (VG_(sizeXA)( verydead_thread_table)
6325 + VG_(sizeXA)( verydead_thread_table_not_pruned)
6326 == llexit_and_joinedwith_done);
philippe900c5352015-03-24 14:02:44 +00006327 }
6328
6329 VG_(printf)("%s","\n");
philipped005b2c2015-04-21 21:58:14 +00006330 {
6331 UWord OldRef_accs_n[N_OLDREF_ACCS+1];
6332 UInt accs_n;
6333 UWord OldRef_n;
6334 UInt i;
6335
6336 OldRef_n = 0;
6337 for (i = 0; i <= N_OLDREF_ACCS; i++)
6338 OldRef_accs_n[i] = 0;
6339
6340 for (OldRef* o = mru.prev; o != &lru; o = o->prev) {
6341 OldRef_n++;
6342 accs_n = 0;
6343 for (i = 0; i < N_OLDREF_ACCS; i++) {
6344 if (o->accs[i].thrid != 0)
6345 accs_n++;
6346 }
6347 OldRef_accs_n[accs_n]++;
6348 }
6349
6350 tl_assert(OldRef_n == oldrefTreeN);
6351 VG_(printf)( " libhb: oldrefTreeN %lu ", oldrefTreeN);
6352 VG_(printf)( "( ");
6353 for (i = 0; i <= N_OLDREF_ACCS; i++)
6354 VG_(printf)( "accs[%d]=%lu ", i, OldRef_accs_n[i]);
6355 VG_(printf)( ")\n");
6356 }
sewardjf98e1c02008-10-25 16:22:41 +00006357 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
6358 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
6359 stats__ctxt_rcdec2,
6360 stats__ctxt_rcdec3 );
6361 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
6362 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
philippecabdbb52015-04-20 21:33:16 +00006363 VG_(printf)( " libhb: contextTab: %lu slots,"
6364 " %lu cur ents(ref'd %lu),"
philippe06bc23a2015-04-17 21:19:43 +00006365 " %lu max ents\n",
sewardjf98e1c02008-10-25 16:22:41 +00006366 (UWord)N_RCEC_TAB,
philippecabdbb52015-04-20 21:33:16 +00006367 stats__ctxt_tab_curr, RCEC_referenced,
6368 stats__ctxt_tab_max );
philippe47124e92015-04-25 14:00:24 +00006369 {
6370# define MAXCHAIN 10
6371 UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
6372 UInt non0chain = 0;
6373 UInt n;
6374 UInt i;
6375 RCEC *p;
6376
6377 for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
6378 for (i = 0; i < N_RCEC_TAB; i++) {
6379 n = 0;
6380 for (p = contextTab[i]; p; p = p->next)
6381 n++;
6382 if (n < MAXCHAIN)
6383 chains[n]++;
6384 else
6385 chains[MAXCHAIN]++;
6386 if (n > 0)
6387 non0chain++;
6388 }
6389 VG_(printf)( " libhb: contextTab chain of [length]=nchain."
6390 " Avg chain len %3.1f\n"
6391 " ",
6392 (Double)stats__ctxt_tab_curr
6393 / (Double)(non0chain ? non0chain : 1));
6394 for (i = 0; i <= MAXCHAIN; i++) {
6395 if (chains[i] != 0)
6396 VG_(printf)( "[%d%s]=%d ",
6397 i, i == MAXCHAIN ? "+" : "",
6398 chains[i]);
6399 }
6400 VG_(printf)( "\n");
6401# undef MAXCHAIN
6402 }
sewardjf98e1c02008-10-25 16:22:41 +00006403 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6404 stats__ctxt_tab_qs,
6405 stats__ctxt_tab_cmps );
6406#if 0
6407 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6408 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6409 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6410 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6411 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6412 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6413 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6414 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6415 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6416 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6417 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6418 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6419 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6420 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6421
6422 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6423 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6424 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6425 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6426#endif
6427
6428 VG_(printf)("%s","<<< END libhb stats >>>\n");
6429 VG_(printf)("%s","\n");
6430
6431 }
6432}
6433
sewardjffce8152011-06-24 10:09:41 +00006434/* Receive notification that a thread has low level exited. The
6435 significance here is that we do not expect to see any more memory
6436 references from it. */
sewardjf98e1c02008-10-25 16:22:41 +00006437void libhb_async_exit ( Thr* thr )
6438{
sewardj23f12002009-07-24 08:45:08 +00006439 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006440 tl_assert(!thr->llexit_done);
6441 thr->llexit_done = True;
sewardj2d2ea2f2009-08-02 10:15:07 +00006442
6443 /* free up Filter and local_Kws_n_stacks (well, actually not the
6444 latter ..) */
6445 tl_assert(thr->filter);
6446 HG_(free)(thr->filter);
6447 thr->filter = NULL;
6448
sewardjffce8152011-06-24 10:09:41 +00006449 /* Tell the VTS mechanism this thread has exited, so it can
6450 participate in VTS pruning. Note this can only happen if the
6451 thread has both ll_exited and has been joined with. */
6452 if (thr->joinedwith_done)
6453 VTS__declare_thread_very_dead(thr);
6454
sewardj2d2ea2f2009-08-02 10:15:07 +00006455 /* Another space-accuracy tradeoff. Do we want to be able to show
6456 H1 history for conflicts in threads which have since exited? If
6457 yes, then we better not free up thr->local_Kws_n_stacks. The
6458 downside is a potential per-thread leak of up to
6459 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6460 XArray average overcommit factor is (1.5 I'd guess). */
6461 // hence:
6462 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6463 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006464}
6465
sewardjffce8152011-06-24 10:09:41 +00006466/* Receive notification that a thread has been joined with. The
6467 significance here is that we do not expect to see any further
6468 references to its vector clocks (Thr::viR and Thr::viW). */
6469void libhb_joinedwith_done ( Thr* thr )
6470{
6471 tl_assert(thr);
6472 /* Caller must ensure that this is only ever called once per Thr. */
6473 tl_assert(!thr->joinedwith_done);
6474 thr->joinedwith_done = True;
6475 if (thr->llexit_done)
6476 VTS__declare_thread_very_dead(thr);
6477}
6478
6479
sewardjf98e1c02008-10-25 16:22:41 +00006480/* Both Segs and SOs point to VTSs. However, there is no sharing, so
6481 a Seg that points at a VTS is its one-and-only owner, and ditto for
6482 a SO that points at a VTS. */
6483
6484SO* libhb_so_alloc ( void )
6485{
6486 return SO__Alloc();
6487}
6488
6489void libhb_so_dealloc ( SO* so )
6490{
6491 tl_assert(so);
6492 tl_assert(so->magic == SO_MAGIC);
6493 SO__Dealloc(so);
6494}
6495
6496/* See comments in libhb.h for details on the meaning of
6497 strong vs weak sends and strong vs weak receives. */
6498void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6499{
6500 /* Copy the VTSs from 'thr' into the sync object, and then move
6501 the thread along one step. */
6502
6503 tl_assert(so);
6504 tl_assert(so->magic == SO_MAGIC);
6505
6506 /* stay sane .. a thread's read-clock must always lead or be the
6507 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00006508 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6509 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00006510 }
6511
6512 /* since we're overwriting the VtsIDs in the SO, we need to drop
6513 any references made by the previous contents thereof */
6514 if (so->viR == VtsID_INVALID) {
6515 tl_assert(so->viW == VtsID_INVALID);
6516 so->viR = thr->viR;
6517 so->viW = thr->viW;
6518 VtsID__rcinc(so->viR);
6519 VtsID__rcinc(so->viW);
6520 } else {
6521 /* In a strong send, we dump any previous VC in the SO and
6522 install the sending thread's VC instead. For a weak send we
6523 must join2 with what's already there. */
6524 tl_assert(so->viW != VtsID_INVALID);
6525 VtsID__rcdec(so->viR);
6526 VtsID__rcdec(so->viW);
6527 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6528 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6529 VtsID__rcinc(so->viR);
6530 VtsID__rcinc(so->viW);
6531 }
6532
6533 /* move both parent clocks along */
6534 VtsID__rcdec(thr->viR);
6535 VtsID__rcdec(thr->viW);
6536 thr->viR = VtsID__tick( thr->viR, thr );
6537 thr->viW = VtsID__tick( thr->viW, thr );
sewardjffce8152011-06-24 10:09:41 +00006538 if (!thr->llexit_done) {
sewardj2d2ea2f2009-08-02 10:15:07 +00006539 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00006540 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006541 }
sewardjf98e1c02008-10-25 16:22:41 +00006542 VtsID__rcinc(thr->viR);
6543 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006544
sewardjf98e1c02008-10-25 16:22:41 +00006545 if (strong_send)
6546 show_thread_state("s-send", thr);
6547 else
6548 show_thread_state("w-send", thr);
6549}
6550
6551void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6552{
6553 tl_assert(so);
6554 tl_assert(so->magic == SO_MAGIC);
6555
6556 if (so->viR != VtsID_INVALID) {
6557 tl_assert(so->viW != VtsID_INVALID);
6558
6559 /* Weak receive (basically, an R-acquisition of a R-W lock).
6560 This advances the read-clock of the receiver, but not the
6561 write-clock. */
6562 VtsID__rcdec(thr->viR);
6563 thr->viR = VtsID__join2( thr->viR, so->viR );
6564 VtsID__rcinc(thr->viR);
6565
sewardj90eb22e2009-07-28 20:22:18 +00006566 /* At one point (r10589) it seemed safest to tick the clocks for
6567 the receiving thread after the join. But on reflection, I
6568 wonder if that might cause it to 'overtake' constraints,
6569 which could lead to missing races. So, back out that part of
6570 r10589. */
6571 //VtsID__rcdec(thr->viR);
6572 //thr->viR = VtsID__tick( thr->viR, thr );
6573 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00006574
sewardjf98e1c02008-10-25 16:22:41 +00006575 /* For a strong receive, we also advance the receiver's write
6576 clock, which means the receive as a whole is essentially
6577 equivalent to a W-acquisition of a R-W lock. */
6578 if (strong_recv) {
6579 VtsID__rcdec(thr->viW);
6580 thr->viW = VtsID__join2( thr->viW, so->viW );
6581 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006582
sewardj90eb22e2009-07-28 20:22:18 +00006583 /* See comment just above, re r10589. */
6584 //VtsID__rcdec(thr->viW);
6585 //thr->viW = VtsID__tick( thr->viW, thr );
6586 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00006587 }
6588
sewardjf4845dc2010-05-28 20:09:59 +00006589 if (thr->filter)
6590 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00006591 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00006592
sewardjf98e1c02008-10-25 16:22:41 +00006593 if (strong_recv)
6594 show_thread_state("s-recv", thr);
6595 else
6596 show_thread_state("w-recv", thr);
6597
6598 } else {
6599 tl_assert(so->viW == VtsID_INVALID);
6600 /* Deal with degenerate case: 'so' has no vts, so there has been
6601 no message posted to it. Just ignore this case. */
6602 show_thread_state("d-recv", thr);
6603 }
6604}
6605
6606Bool libhb_so_everSent ( SO* so )
6607{
6608 if (so->viR == VtsID_INVALID) {
6609 tl_assert(so->viW == VtsID_INVALID);
6610 return False;
6611 } else {
6612 tl_assert(so->viW != VtsID_INVALID);
6613 return True;
6614 }
6615}
6616
6617#define XXX1 0 // 0x67a106c
6618#define XXX2 0
6619
sewardj23f12002009-07-24 08:45:08 +00006620static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00006621 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6622 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6623 return False;
6624}
florian0c8a47c2013-10-01 20:10:21 +00006625static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
florian6bf37262012-10-21 03:23:36 +00006626{
sewardj23f12002009-07-24 08:45:08 +00006627 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00006628 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6629 show_thread_state("", thr);
6630 VG_(printf)("%s","\n");
6631}
6632
sewardj23f12002009-07-24 08:45:08 +00006633void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006634{
6635 SVal sv = SVal__mkC(thr->viW, thr->viW);
6636 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00006637 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6638 zsm_sset_range( a, szB, sv );
6639 Filter__clear_range( thr->filter, a, szB );
6640 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00006641}
6642
sewardjfd35d492011-03-17 19:39:55 +00006643void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006644{
sewardj23f12002009-07-24 08:45:08 +00006645 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00006646}
6647
philippef54cb662015-05-10 22:19:31 +00006648
6649/* Set the lines zix_start till zix_end to NOACCESS. */
6650static void zsm_secmap_line_range_noaccess (SecMap *sm,
6651 UInt zix_start, UInt zix_end)
6652{
6653 for (UInt lz = zix_start; lz <= zix_end; lz++) {
6654 LineZ* lineZ;
6655 LineF* lineF;
6656 lineZ = &sm->linesZ[lz];
6657 if (lineZ->dict[0] != SVal_INVALID) {
6658 rcdec_LineZ(lineZ);
6659 } else {
6660 UInt fix = (UInt)lineZ->dict[1];
6661 tl_assert(sm->linesF);
6662 tl_assert(sm->linesF_size > 0);
6663 tl_assert(fix >= 0 && fix < sm->linesF_size);
6664 lineF = &sm->linesF[fix];
6665 rcdec_LineF(lineF);
6666 lineF->inUse = False;
6667 }
6668 lineZ->dict[0] = SVal_NOACCESS;
6669 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
6670 for (UInt i = 0; i < N_LINE_ARANGE/4; i++)
6671 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
6672 }
6673}
6674
6675/* Set the given range to SVal_NOACCESS in-place in the secmap.
6676 a must be cacheline aligned. len must be a multiple of a cacheline
6677 and must be < N_SECMAP_ARANGE. */
6678static void zsm_sset_range_noaccess_in_secmap(Addr a, SizeT len)
6679{
6680 tl_assert (is_valid_scache_tag (a));
6681 tl_assert (0 == (len & (N_LINE_ARANGE - 1)));
6682 tl_assert (len < N_SECMAP_ARANGE);
6683
6684 SecMap *sm1 = shmem__find_SecMap (a);
6685 SecMap *sm2 = shmem__find_SecMap (a + len - 1);
6686 UWord zix_start = shmem__get_SecMap_offset(a ) >> N_LINE_BITS;
6687 UWord zix_end = shmem__get_SecMap_offset(a + len - 1) >> N_LINE_BITS;
6688
6689 if (sm1) {
6690 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm1));
6691 zsm_secmap_line_range_noaccess (sm1, zix_start,
6692 sm1 == sm2 ? zix_end : N_SECMAP_ZLINES-1);
6693 }
6694 if (sm2 && sm1 != sm2) {
6695 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm2));
6696 zsm_secmap_line_range_noaccess (sm2, 0, zix_end);
6697 }
6698}
6699
6700/* Set the given address range to SVal_NOACCESS.
6701 The SecMaps fully set to SVal_NOACCESS will be pushed in SecMap_freelist. */
6702static void zsm_sset_range_noaccess (Addr addr, SizeT len)
6703{
6704 /*
6705 BPC = Before, Partial Cacheline, = addr
6706 (i.e. starting inside a cacheline/inside a SecMap)
6707 BFC = Before, Full Cacheline(s), but not full SecMap
6708 (i.e. starting inside a SecMap)
6709 FSM = Full SecMap(s)
6710 (i.e. starting a SecMap)
6711 AFC = After, Full Cacheline(s), but not full SecMap
6712 (i.e. first address after the full SecMap(s))
6713 APC = After, Partial Cacheline, i.e. first address after the
6714 full CacheLines).
6715 ARE = After Range End = addr+len = first address not part of the range.
6716
6717 If addr starts a Cacheline, then BPC == BFC.
6718 If addr starts a SecMap, then BPC == BFC == FSM.
6719 If addr+len starts a SecMap, then APC == ARE == AFC
6720 If addr+len starts a Cacheline, then APC == ARE
6721 */
6722 Addr ARE = addr + len;
6723 Addr BPC = addr;
6724 Addr BFC = ROUNDUP(BPC, N_LINE_ARANGE);
6725 Addr FSM = ROUNDUP(BPC, N_SECMAP_ARANGE);
6726 Addr AFC = ROUNDDN(ARE, N_SECMAP_ARANGE);
6727 Addr APC = ROUNDDN(ARE, N_LINE_ARANGE);
6728 SizeT Plen = len; // Plen will be split between the following:
6729 SizeT BPClen;
6730 SizeT BFClen;
6731 SizeT FSMlen;
6732 SizeT AFClen;
6733 SizeT APClen;
6734
6735 /* Consumes from Plen the nr of bytes between from and to.
6736 from and to must be aligned on a multiple of round.
6737 The length consumed will be a multiple of round, with
6738 a maximum of Plen. */
6739# define PlenCONSUME(from, to, round, consumed) \
6740 do { \
6741 if (from < to) { \
6742 if (to - from < Plen) \
6743 consumed = to - from; \
6744 else \
6745 consumed = ROUNDDN(Plen, round); \
6746 } else { \
6747 consumed = 0; \
6748 } \
6749 Plen -= consumed; } while (0)
6750
6751 PlenCONSUME(BPC, BFC, 1, BPClen);
6752 PlenCONSUME(BFC, FSM, N_LINE_ARANGE, BFClen);
6753 PlenCONSUME(FSM, AFC, N_SECMAP_ARANGE, FSMlen);
6754 PlenCONSUME(AFC, APC, N_LINE_ARANGE, AFClen);
6755 PlenCONSUME(APC, ARE, 1, APClen);
6756
6757 if (0)
6758 VG_(printf) ("addr %p[%ld] ARE %p"
6759 " BPC %p[%ld] BFC %p[%ld] FSM %p[%ld]"
6760 " AFC %p[%ld] APC %p[%ld]\n",
6761 (void*)addr, len, (void*)ARE,
6762 (void*)BPC, BPClen, (void*)BFC, BFClen, (void*)FSM, FSMlen,
6763 (void*)AFC, AFClen, (void*)APC, APClen);
6764
6765 tl_assert (Plen == 0);
6766
6767 /* Set to NOACCESS pieces before and after not covered by entire SecMaps. */
6768
6769 /* First we set the partial cachelines. This is done through the cache. */
6770 if (BPClen > 0)
6771 zsm_sset_range_SMALL (BPC, BPClen, SVal_NOACCESS);
6772 if (APClen > 0)
6773 zsm_sset_range_SMALL (APC, APClen, SVal_NOACCESS);
6774
6775 /* After this, we will not use the cache anymore. We will directly work
6776 in-place on the z shadow memory in SecMap(s).
6777 So, we invalidate the cachelines for the whole range we are setting
6778 to NOACCESS below. */
6779 shmem__invalidate_scache_range (BFC, APC - BFC);
6780
6781 if (BFClen > 0)
6782 zsm_sset_range_noaccess_in_secmap (BFC, BFClen);
6783 if (AFClen > 0)
6784 zsm_sset_range_noaccess_in_secmap (AFC, AFClen);
6785
6786 if (FSMlen > 0) {
6787 /* Set to NOACCESS all the SecMaps, pushing the SecMaps to the
6788 free list. */
6789 Addr sm_start = FSM;
6790 while (sm_start < AFC) {
6791 SecMap *sm = shmem__find_SecMap (sm_start);
6792 if (sm) {
6793 Addr gaKey;
6794 SecMap *fm_sm;
6795
6796 if (CHECK_ZSM) tl_assert(is_sane_SecMap(sm));
6797 for (UInt lz = 0; lz < N_SECMAP_ZLINES; lz++) {
6798 if (sm->linesZ[lz].dict[0] != SVal_INVALID)
6799 rcdec_LineZ(&sm->linesZ[lz]);
6800 }
6801 for (UInt lf = 0; lf < sm->linesF_size; lf++) {
6802 if (sm->linesF[lf].inUse)
6803 rcdec_LineF (&sm->linesF[lf]);
6804 }
6805 if (sm->linesF_size > 0) {
6806 HG_(free)(sm->linesF);
6807 stats__secmap_linesF_allocd -= sm->linesF_size;
6808 stats__secmap_linesF_bytes -= sm->linesF_size * sizeof(LineF);
6809 }
6810 if (!VG_(delFromFM)(map_shmem, &gaKey, (UWord*)&fm_sm, sm_start))
6811 tl_assert (0);
6812 stats__secmaps_in_map_shmem--;
6813 tl_assert (gaKey == sm_start);
6814 tl_assert (sm == fm_sm);
6815 stats__secmaps_ssetGCed++;
6816 push_SecMap_on_freelist (sm);
6817 }
6818 sm_start += N_SECMAP_ARANGE;
6819 }
6820 tl_assert (sm_start == AFC);
6821
6822 /* The above loop might have kept copies of freed SecMap in the smCache.
6823 => clear them. */
6824 if (address_in_range(smCache[0].gaKey, FSM, FSMlen)) {
6825 smCache[0].gaKey = 1;
6826 smCache[0].sm = NULL;
6827 }
6828 if (address_in_range(smCache[1].gaKey, FSM, FSMlen)) {
6829 smCache[1].gaKey = 1;
6830 smCache[1].sm = NULL;
6831 }
6832 if (address_in_range(smCache[2].gaKey, FSM, FSMlen)) {
6833 smCache[2].gaKey = 1;
6834 smCache[2].sm = NULL;
6835 }
6836 STATIC_ASSERT (3 == sizeof(smCache)/sizeof(SMCacheEnt));
6837 }
6838}
6839
sewardjfd35d492011-03-17 19:39:55 +00006840void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6841{
6842 /* This really does put the requested range in NoAccess. It's
6843 expensive though. */
6844 SVal sv = SVal_NOACCESS;
6845 tl_assert(is_sane_SVal_C(sv));
philippef54cb662015-05-10 22:19:31 +00006846 if (LIKELY(szB < 2 * N_LINE_ARANGE))
6847 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
6848 else
6849 zsm_sset_range_noaccess (a, szB);
sewardjfd35d492011-03-17 19:39:55 +00006850 Filter__clear_range( thr->filter, a, szB );
6851}
6852
philippef54cb662015-05-10 22:19:31 +00006853/* Works byte at a time. Can be optimised if needed. */
6854UWord libhb_srange_get_abits (Addr a, UChar *abits, SizeT len)
6855{
6856 UWord anr = 0; // nr of bytes addressable.
6857
6858 /* Get the accessibility of each byte. Pay attention to not
6859 create SecMap or LineZ when checking if a byte is addressable.
6860
6861 Note: this is used for client request. Performance deemed not critical.
6862 So for simplicity, we work byte per byte.
6863 Performance could be improved by working with full cachelines
6864 or with full SecMap, when reaching a cacheline or secmap boundary. */
6865 for (SizeT i = 0; i < len; i++) {
6866 SVal sv = SVal_INVALID;
6867 Addr b = a + i;
6868 Addr tag = b & ~(N_LINE_ARANGE - 1);
6869 UWord wix = (b >> N_LINE_BITS) & (N_WAY_NENT - 1);
6870 UWord cloff = get_cacheline_offset(b);
6871
6872 /* Note: we do not use get_cacheline(b) to avoid creating cachelines
6873 and/or SecMap for non addressable bytes. */
6874 if (tag == cache_shmem.tags0[wix]) {
6875 CacheLine copy = cache_shmem.lyns0[wix];
6876 /* We work on a copy of the cacheline, as we do not want to
6877 record the client request as a real read.
6878 The below is somewhat similar to zsm_sapply08__msmcread but
6879 avoids side effects on the cache. */
6880 UWord toff = get_tree_offset(b); /* == 0 .. 7 */
6881 UWord tno = get_treeno(b);
6882 UShort descr = copy.descrs[tno];
6883 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
6884 SVal* tree = &copy.svals[tno << 3];
6885 copy.descrs[tno] = pulldown_to_8(tree, toff, descr);
6886 }
6887 sv = copy.svals[cloff];
6888 } else {
6889 /* Byte not found in the cacheline. Search for a SecMap. */
6890 SecMap *sm = shmem__find_SecMap(b);
6891 LineZ *lineZ;
6892 if (sm == NULL)
6893 sv = SVal_NOACCESS;
6894 else {
6895 UWord zix = shmem__get_SecMap_offset(b) >> N_LINE_BITS;
6896 lineZ = &sm->linesZ[zix];
6897 if (lineZ->dict[0] == SVal_INVALID) {
6898 UInt fix = (UInt)lineZ->dict[1];
6899 sv = sm->linesF[fix].w64s[cloff];
6900 } else {
6901 UWord ix = read_twobit_array( lineZ->ix2s, cloff );
6902 sv = lineZ->dict[ix];
6903 }
6904 }
6905 }
6906
6907 tl_assert (sv != SVal_INVALID);
6908 if (sv == SVal_NOACCESS) {
6909 if (abits)
6910 abits[i] = 0x00;
6911 } else {
6912 if (abits)
6913 abits[i] = 0xff;
6914 anr++;
6915 }
6916 }
6917
6918 return anr;
6919}
6920
6921
sewardj406bac82010-03-03 23:03:40 +00006922void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
6923{
6924 SVal sv = SVal_NOACCESS;
6925 tl_assert(is_sane_SVal_C(sv));
6926 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
philippef54cb662015-05-10 22:19:31 +00006927 if (LIKELY(szB < 2 * N_LINE_ARANGE))
6928 zsm_sset_range_SMALL (a, szB, SVal_NOACCESS);
6929 else
6930 zsm_sset_range_noaccess (a, szB);
sewardj406bac82010-03-03 23:03:40 +00006931 Filter__clear_range( thr->filter, a, szB );
6932 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
6933}
6934
sewardj0b20a152011-03-10 21:34:21 +00006935Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
sewardjf98e1c02008-10-25 16:22:41 +00006936 tl_assert(thr);
sewardj60626642011-03-10 15:14:37 +00006937 return thr->hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006938}
6939
sewardj0b20a152011-03-10 21:34:21 +00006940void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
sewardjf98e1c02008-10-25 16:22:41 +00006941 tl_assert(thr);
sewardj0b20a152011-03-10 21:34:21 +00006942 thr->hgthread = hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006943}
6944
sewardj23f12002009-07-24 08:45:08 +00006945void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00006946{
sewardj23f12002009-07-24 08:45:08 +00006947 zsm_scopy_range(src, dst, len);
6948 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00006949}
6950
6951void libhb_maybe_GC ( void )
6952{
philippecabdbb52015-04-20 21:33:16 +00006953 /* GC the unreferenced (zero rc) RCECs when
philippee0829e02015-04-21 20:55:40 +00006954 (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
6955 with mostly NULL ptr)
6956 and (2) approaching the max nr of RCEC (as we have in any case
6957 at least that amount of RCEC in the pool allocator)
6958 Note: the margin allows to avoid a small but constant increase
6959 of the max nr of RCEC due to the fact that libhb_maybe_GC is
6960 not called when the current nr of RCEC exactly reaches the max.
6961 and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
6962 Avoid growing too much the nr of RCEC keeps the memory use low,
6963 and avoids to have too many elements in the (fixed) contextTab hashtable.
6964 */
philippecabdbb52015-04-20 21:33:16 +00006965 if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
philippee0829e02015-04-21 20:55:40 +00006966 && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
philippef54cb662015-05-10 22:19:31 +00006967 && (stats__ctxt_tab_curr * 3)/4 > RCEC_referenced))
philippecabdbb52015-04-20 21:33:16 +00006968 do_RCEC_GC();
philippe158404e2015-04-10 19:34:14 +00006969
philippef54cb662015-05-10 22:19:31 +00006970 /* If there are still no entries available (all the table entries are full),
6971 and we hit the threshhold point, then do a GC */
6972 Bool vts_tab_GC = vts_tab_freelist == VtsID_INVALID
6973 && VG_(sizeXA)( vts_tab ) >= vts_next_GC_at;
6974 if (UNLIKELY (vts_tab_GC))
6975 vts_tab__do_GC( False/*don't show stats*/ );
6976
6977 /* scan GC the SecMaps when
6978 (1) no SecMap in the freelist
6979 and (2) the current nr of live secmaps exceeds the threshold. */
6980 if (UNLIKELY(SecMap_freelist == NULL
6981 && stats__secmaps_in_map_shmem >= next_SecMap_GC_at)) {
6982 // If we did a vts tab GC, then no need to flush the cache again.
6983 if (!vts_tab_GC)
6984 zsm_flush_cache();
6985 shmem__SecMap_do_GC(True);
6986 }
philippecabdbb52015-04-20 21:33:16 +00006987
6988 /* Check the reference counts (expensive) */
6989 if (CHECK_CEM)
6990 event_map__check_reference_counts();
sewardjf98e1c02008-10-25 16:22:41 +00006991}
6992
6993
6994/////////////////////////////////////////////////////////////////
6995/////////////////////////////////////////////////////////////////
6996// //
6997// SECTION END main library //
6998// //
6999/////////////////////////////////////////////////////////////////
7000/////////////////////////////////////////////////////////////////
7001
7002/*--------------------------------------------------------------------*/
7003/*--- end libhb_main.c ---*/
7004/*--------------------------------------------------------------------*/