blob: c6fb0723c08e7ee712a04cb0c535b25fc9ac2298 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
sewardj0f157dd2013-10-18 14:27:36 +000012 Copyright (C) 2008-2013 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000034#include "pub_tool_poolalloc.h"
sewardjf98e1c02008-10-25 16:22:41 +000035#include "pub_tool_libcassert.h"
36#include "pub_tool_libcbase.h"
37#include "pub_tool_libcprint.h"
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000040#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000041#include "pub_tool_xarray.h"
42#include "pub_tool_oset.h"
43#include "pub_tool_threadstate.h"
44#include "pub_tool_aspacemgr.h"
45#include "pub_tool_execontext.h"
46#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000047#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000048#include "hg_basics.h"
49#include "hg_wordset.h"
50#include "hg_lock_n_thread.h"
51#include "hg_errors.h"
52
53#include "libhb.h"
54
55
sewardj8f5374e2008-12-07 11:40:17 +000056/////////////////////////////////////////////////////////////////
57/////////////////////////////////////////////////////////////////
58// //
59// Debugging #defines //
60// //
61/////////////////////////////////////////////////////////////////
62/////////////////////////////////////////////////////////////////
63
64/* Check the sanity of shadow values in the core memory state
65 machine. Change #if 0 to #if 1 to enable this. */
66#if 0
67# define CHECK_MSM 1
68#else
69# define CHECK_MSM 0
70#endif
71
72
73/* Check sanity (reference counts, etc) in the conflicting access
74 machinery. Change #if 0 to #if 1 to enable this. */
75#if 0
76# define CHECK_CEM 1
77#else
78# define CHECK_CEM 0
79#endif
80
81
82/* Check sanity in the compressed shadow memory machinery,
83 particularly in its caching innards. Unfortunately there's no
84 almost-zero-cost way to make them selectable at run time. Hence
85 set the #if 0 to #if 1 and rebuild if you want them. */
86#if 0
87# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
88# define inline __attribute__((noinline))
89 /* probably want to ditch -fomit-frame-pointer too */
90#else
91# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
92#endif
93
94
95/////////////////////////////////////////////////////////////////
96/////////////////////////////////////////////////////////////////
97// //
sewardjffce8152011-06-24 10:09:41 +000098// data decls: VtsID //
99// //
100/////////////////////////////////////////////////////////////////
101/////////////////////////////////////////////////////////////////
102
103/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
104 bits, since they have to be packed into the lowest 30 bits of an
105 SVal. */
106typedef UInt VtsID;
107#define VtsID_INVALID 0xFFFFFFFF
108
109
110
111/////////////////////////////////////////////////////////////////
112/////////////////////////////////////////////////////////////////
113// //
114// data decls: SVal //
115// //
116/////////////////////////////////////////////////////////////////
117/////////////////////////////////////////////////////////////////
118
119typedef ULong SVal;
120
121/* This value has special significance to the implementation, and callers
122 may not store it in the shadow memory. */
123#define SVal_INVALID (3ULL << 62)
124
125/* This is the default value for shadow memory. Initially the shadow
126 memory contains no accessible areas and so all reads produce this
127 value. TODO: make this caller-defineable. */
128#define SVal_NOACCESS (2ULL << 62)
129
130
131
132/////////////////////////////////////////////////////////////////
133/////////////////////////////////////////////////////////////////
134// //
135// data decls: ScalarTS //
136// //
137/////////////////////////////////////////////////////////////////
138/////////////////////////////////////////////////////////////////
139
140/* Scalar Timestamp. We have to store a lot of these, so there is
141 some effort to make them as small as possible. Logically they are
142 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
143 We pack it into 64 bits by representing the Thr* using a ThrID, a
144 small integer (18 bits), and a 46 bit integer for the timestamp
145 number. The 46/18 split is arbitary, but has the effect that
146 Helgrind can only handle programs that create 2^18 or fewer threads
147 over their entire lifetime, and have no more than 2^46 timestamp
148 ticks (synchronisation operations on the same thread).
149
150 This doesn't seem like much of a limitation. 2^46 ticks is
151 7.06e+13, and if each tick (optimistically) takes the machine 1000
152 cycles to process, then the minimum time to process that many ticks
153 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
154 but VTS ticks, which isn't realistic.
155
156 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
157 32 since a ThrID is a UInt. 29 comes from the fact that
158 'Thr_n_RCEC', which records information about old accesses, packs
159 not only a ThrID but also 2+1 other bits (access size and
160 writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
161
162 NB2: thrid values are issued upwards from 1024, and values less
163 than that aren't valid. This isn't per se necessary (any order
164 will do, so long as they are unique), but it does help ensure they
165 are less likely to get confused with the various other kinds of
166 small-integer thread ids drifting around (eg, TId). See also NB5.
167
168 NB3: this probably also relies on the fact that Thr's are never
169 deallocated -- they exist forever. Hence the 1-1 mapping from
170 Thr's to thrid values (set up in Thr__new) persists forever.
171
172 NB4: temp_max_sized_VTS is allocated at startup and never freed.
173 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
174 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
175 making the memory use for this go sky-high. With
176 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
177 like an OK tradeoff. If more than 256k threads need to be
178 supported, we could change SCALARTS_N_THRBITS to 20, which would
179 facilitate supporting 1 million threads at the cost of 8MB storage
180 for temp_max_sized_VTS.
181
182 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
183 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
184 must never be a valid ThrID. Given NB2 that's OK.
185*/
186#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
187
188#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
189typedef
190 struct {
191 ThrID thrid : SCALARTS_N_THRBITS;
192 ULong tym : SCALARTS_N_TYMBITS;
193 }
194 ScalarTS;
195
196#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
197
198
199
200/////////////////////////////////////////////////////////////////
201/////////////////////////////////////////////////////////////////
202// //
203// data decls: Filter //
204// //
205/////////////////////////////////////////////////////////////////
206/////////////////////////////////////////////////////////////////
207
208// baseline: 5, 9
209#define FI_LINE_SZB_LOG2 5
210#define FI_NUM_LINES_LOG2 10
211
212#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
213#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
214
215#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
216#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
217
218#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
219 & (Addr)(FI_NUM_LINES-1) )
220
221
222/* In the lines, each 8 bytes are treated individually, and are mapped
223 to a UShort. Regardless of endianness of the underlying machine,
224 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
225 the highest address.
226
227 Of each bit pair, the higher numbered bit is set if a R has been
228 seen, so the actual layout is:
229
230 15 14 ... 01 00
231
232 R W for addr+7 ... R W for addr+0
233
234 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
235*/
236
237/* tags are separated from lines. tags are Addrs and are
238 the base address of the line. */
239typedef
240 struct {
241 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
242 }
243 FiLine;
244
245typedef
246 struct {
247 Addr tags[FI_NUM_LINES];
248 FiLine lines[FI_NUM_LINES];
249 }
250 Filter;
251
252
253
254/////////////////////////////////////////////////////////////////
255/////////////////////////////////////////////////////////////////
256// //
257// data decls: Thr, ULong_n_EC //
258// //
259/////////////////////////////////////////////////////////////////
260/////////////////////////////////////////////////////////////////
261
262// Records stacks for H1 history mechanism (DRD-style)
263typedef
264 struct { ULong ull; ExeContext* ec; }
265 ULong_n_EC;
266
267
268/* How many of the above records to collect for each thread? Older
269 ones are dumped when we run out of space. 62.5k requires 1MB per
270 thread, since each ULong_n_EC record is 16 bytes long. When more
271 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
272 deleted to make space. Hence in the worst case we will be able to
273 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
274 Kw transitions (segments in this thread). For the current setting
275 that gives a guaranteed stack for at least the last 31.25k
276 segments. */
277#define N_KWs_N_STACKs_PER_THREAD 62500
278
279
280struct _Thr {
281 /* Current VTSs for this thread. They change as we go along. viR
282 is the VTS to be used for reads, viW for writes. Usually they
283 are the same, but can differ when we deal with reader-writer
284 locks. It is always the case that
285 VtsID__cmpLEQ(viW,viR) == True
286 that is, viW must be the same, or lagging behind, viR. */
287 VtsID viR;
288 VtsID viW;
289
290 /* Is initially False, and is set to True after the thread really
291 has done a low-level exit. When True, we expect to never see
292 any more memory references done by this thread. */
293 Bool llexit_done;
294
295 /* Is initially False, and is set to True after the thread has been
296 joined with (reaped by some other thread). After this point, we
297 do not expect to see any uses of .viR or .viW, so it is safe to
298 set them to VtsID_INVALID. */
299 Bool joinedwith_done;
300
301 /* A small integer giving a unique identity to this Thr. See
302 comments on the definition of ScalarTS for details. */
303 ThrID thrid : SCALARTS_N_THRBITS;
304
305 /* A filter that removes references for which we believe that
306 msmcread/msmcwrite will not change the state, nor report a
307 race. */
308 Filter* filter;
309
310 /* A pointer back to the top level Thread structure. There is a
311 1-1 mapping between Thread and Thr structures -- each Thr points
312 at its corresponding Thread, and vice versa. Really, Thr and
313 Thread should be merged into a single structure. */
314 Thread* hgthread;
315
316 /* The ULongs (scalar Kws) in this accumulate in strictly
317 increasing order, without duplicates. This is important because
318 we need to be able to find a given scalar Kw in this array
319 later, by binary search. */
320 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
321};
322
323
324
325/////////////////////////////////////////////////////////////////
326/////////////////////////////////////////////////////////////////
327// //
328// data decls: SO //
329// //
330/////////////////////////////////////////////////////////////////
331/////////////////////////////////////////////////////////////////
332
333// (UInt) `echo "Synchronisation object" | md5sum`
334#define SO_MAGIC 0x56b3c5b0U
335
336struct _SO {
337 struct _SO* admin_prev;
338 struct _SO* admin_next;
339 VtsID viR; /* r-clock of sender */
340 VtsID viW; /* w-clock of sender */
341 UInt magic;
342};
343
344
345
346/////////////////////////////////////////////////////////////////
347/////////////////////////////////////////////////////////////////
348// //
sewardj8f5374e2008-12-07 11:40:17 +0000349// Forward declarations //
350// //
351/////////////////////////////////////////////////////////////////
352/////////////////////////////////////////////////////////////////
353
sewardjf98e1c02008-10-25 16:22:41 +0000354/* fwds for
355 Globals needed by other parts of the library. These are set
356 once at startup and then never changed. */
357static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000358static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000359
sewardjffce8152011-06-24 10:09:41 +0000360/* misc fn and data fwdses */
361static void VtsID__rcinc ( VtsID ii );
362static void VtsID__rcdec ( VtsID ii );
363
364static inline Bool SVal__isC ( SVal s );
365static inline VtsID SVal__unC_Rmin ( SVal s );
366static inline VtsID SVal__unC_Wmin ( SVal s );
367static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
368
369/* A double linked list of all the SO's. */
370SO* admin_SO;
371
sewardjf98e1c02008-10-25 16:22:41 +0000372
373
374/////////////////////////////////////////////////////////////////
375/////////////////////////////////////////////////////////////////
376// //
377// SECTION BEGIN compressed shadow memory //
378// //
379/////////////////////////////////////////////////////////////////
380/////////////////////////////////////////////////////////////////
381
382#ifndef __HB_ZSM_H
383#define __HB_ZSM_H
384
sewardjf98e1c02008-10-25 16:22:41 +0000385/* Initialise the library. Once initialised, it will (or may) call
386 rcinc and rcdec in response to all the calls below, in order to
387 allow the user to do reference counting on the SVals stored herein.
388 It is important to understand, however, that due to internal
389 caching, the reference counts are in general inaccurate, and can be
390 both above or below the true reference count for an item. In
391 particular, the library may indicate that the reference count for
392 an item is zero, when in fact it is not.
393
394 To make the reference counting exact and therefore non-pointless,
395 call zsm_flush_cache. Immediately after it returns, the reference
396 counts for all items, as deduced by the caller by observing calls
397 to rcinc and rcdec, will be correct, and so any items with a zero
398 reference count may be freed (or at least considered to be
399 unreferenced by this library).
400*/
401static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
402
sewardj23f12002009-07-24 08:45:08 +0000403static void zsm_sset_range ( Addr, SizeT, SVal );
404static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000405static void zsm_flush_cache ( void );
406
407#endif /* ! __HB_ZSM_H */
408
409
sewardjf98e1c02008-10-25 16:22:41 +0000410/* Round a up to the next multiple of N. N must be a power of 2 */
411#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
412/* Round a down to the next multiple of N. N must be a power of 2 */
413#define ROUNDDN(a, N) ((a) & ~(N-1))
414
415
416
417/* ------ User-supplied RC functions ------ */
418static void(*rcinc)(SVal) = NULL;
419static void(*rcdec)(SVal) = NULL;
420
421
422/* ------ CacheLine ------ */
423
424#define N_LINE_BITS 6 /* must be >= 3 */
425#define N_LINE_ARANGE (1 << N_LINE_BITS)
426#define N_LINE_TREES (N_LINE_ARANGE >> 3)
427
428typedef
429 struct {
430 UShort descrs[N_LINE_TREES];
431 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
432 }
433 CacheLine;
434
435#define TREE_DESCR_16_0 (1<<0)
436#define TREE_DESCR_32_0 (1<<1)
437#define TREE_DESCR_16_1 (1<<2)
438#define TREE_DESCR_64 (1<<3)
439#define TREE_DESCR_16_2 (1<<4)
440#define TREE_DESCR_32_1 (1<<5)
441#define TREE_DESCR_16_3 (1<<6)
442#define TREE_DESCR_8_0 (1<<7)
443#define TREE_DESCR_8_1 (1<<8)
444#define TREE_DESCR_8_2 (1<<9)
445#define TREE_DESCR_8_3 (1<<10)
446#define TREE_DESCR_8_4 (1<<11)
447#define TREE_DESCR_8_5 (1<<12)
448#define TREE_DESCR_8_6 (1<<13)
449#define TREE_DESCR_8_7 (1<<14)
450#define TREE_DESCR_DTY (1<<15)
451
452typedef
453 struct {
454 SVal dict[4]; /* can represent up to 4 diff values in the line */
455 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
456 dict indexes */
457 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
458 LineF to use, and dict[2..] are also SVal_INVALID. */
459 }
460 LineZ; /* compressed rep for a cache line */
461
462typedef
463 struct {
464 Bool inUse;
465 SVal w64s[N_LINE_ARANGE];
466 }
467 LineF; /* full rep for a cache line */
468
469/* Shadow memory.
470 Primary map is a WordFM Addr SecMap*.
471 SecMaps cover some page-size-ish section of address space and hold
472 a compressed representation.
473 CacheLine-sized chunks of SecMaps are copied into a Cache, being
474 decompressed when moved into the cache and recompressed on the
475 way out. Because of this, the cache must operate as a writeback
476 cache, not a writethrough one.
477
478 Each SecMap must hold a power-of-2 number of CacheLines. Hence
479 N_SECMAP_BITS must >= N_LINE_BITS.
480*/
481#define N_SECMAP_BITS 13
482#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
483
484// # CacheLines held by a SecMap
485#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
486
487/* The data in the SecMap is held in the array of LineZs. Each LineZ
488 either carries the required data directly, in a compressed
489 representation, or it holds (in .dict[0]) an index to the LineF in
490 .linesF that holds the full representation.
491
492 Currently-unused LineF's have their .inUse bit set to zero.
493 Since each in-use LineF is referred to be exactly one LineZ,
494 the number of .linesZ[] that refer to .linesF should equal
495 the number of .linesF[] that have .inUse == True.
496
497 RC obligations: the RCs presented to the user include exactly
498 the values in:
499 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
500 * F reps that are in use (.inUse == True)
501
502 Hence the following actions at the following transitions are required:
503
504 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
505 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
506 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
507 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
508*/
509typedef
510 struct {
511 UInt magic;
512 LineZ linesZ[N_SECMAP_ZLINES];
513 LineF* linesF;
514 UInt linesF_size;
515 }
516 SecMap;
517
518#define SecMap_MAGIC 0x571e58cbU
519
sewardj5aa09bf2014-06-20 14:25:53 +0000520__attribute__((unused))
sewardjf98e1c02008-10-25 16:22:41 +0000521static inline Bool is_sane_SecMap ( SecMap* sm ) {
522 return sm != NULL && sm->magic == SecMap_MAGIC;
523}
524
525/* ------ Cache ------ */
526
527#define N_WAY_BITS 16
528#define N_WAY_NENT (1 << N_WAY_BITS)
529
530/* Each tag is the address of the associated CacheLine, rounded down
531 to a CacheLine address boundary. A CacheLine size must be a power
532 of 2 and must be 8 or more. Hence an easy way to initialise the
533 cache so it is empty is to set all the tag values to any value % 8
534 != 0, eg 1. This means all queries in the cache initially miss.
535 It does however require us to detect and not writeback, any line
536 with a bogus tag. */
537typedef
538 struct {
539 CacheLine lyns0[N_WAY_NENT];
540 Addr tags0[N_WAY_NENT];
541 }
542 Cache;
543
544static inline Bool is_valid_scache_tag ( Addr tag ) {
545 /* a valid tag should be naturally aligned to the start of
546 a CacheLine. */
547 return 0 == (tag & (N_LINE_ARANGE - 1));
548}
549
550
551/* --------- Primary data structures --------- */
552
553/* Shadow memory primary map */
554static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
555static Cache cache_shmem;
556
557
558static UWord stats__secmaps_search = 0; // # SM finds
559static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
560static UWord stats__secmaps_allocd = 0; // # SecMaps issued
561static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
562static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
563static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
564static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
565static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
566static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
567static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
568static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
569static UWord stats__cache_F_fetches = 0; // # F lines fetched
570static UWord stats__cache_F_wbacks = 0; // # F lines written back
571static UWord stats__cache_invals = 0; // # cache invals
572static UWord stats__cache_flushes = 0; // # cache flushes
573static UWord stats__cache_totrefs = 0; // # total accesses
574static UWord stats__cache_totmisses = 0; // # misses
575static ULong stats__cache_make_New_arange = 0; // total arange made New
576static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
577static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000578static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
579static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
580static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
581static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
582static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
583static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
584static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
585static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
586static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
587static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
588static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
589static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
590static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
591static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000592static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
593static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
594static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
595static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
596static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
597static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000598static UWord stats__vts__tick = 0; // # calls to VTS__tick
599static UWord stats__vts__join = 0; // # calls to VTS__join
600static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
601static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
sewardj7aa38a92011-02-27 23:04:12 +0000602
603// # calls to VTS__cmp_structural w/ slow case
604static UWord stats__vts__cmp_structural_slow = 0;
605
606// # calls to VTS__indexAt_SLOW
607static UWord stats__vts__indexat_slow = 0;
608
609// # calls to vts_set__find__or__clone_and_add
610static UWord stats__vts_set__focaa = 0;
611
612// # calls to vts_set__find__or__clone_and_add that lead to an
613// allocation
614static UWord stats__vts_set__focaa_a = 0;
sewardjc8028ad2010-05-05 09:34:42 +0000615
sewardjf98e1c02008-10-25 16:22:41 +0000616
617static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
618 return a & ~(N_SECMAP_ARANGE - 1);
619}
620static inline UWord shmem__get_SecMap_offset ( Addr a ) {
621 return a & (N_SECMAP_ARANGE - 1);
622}
623
624
625/*----------------------------------------------------------------*/
626/*--- map_shmem :: WordFM Addr SecMap ---*/
627/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
628/*----------------------------------------------------------------*/
629
630/*--------------- SecMap allocation --------------- */
631
632static HChar* shmem__bigchunk_next = NULL;
633static HChar* shmem__bigchunk_end1 = NULL;
634
635static void* shmem__bigchunk_alloc ( SizeT n )
636{
637 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
638 tl_assert(n > 0);
639 n = VG_ROUNDUP(n, 16);
640 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
641 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
642 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
643 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
644 if (0)
645 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
646 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
647 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
648 if (shmem__bigchunk_next == NULL)
649 VG_(out_of_memory_NORETURN)(
650 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
651 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
652 }
653 tl_assert(shmem__bigchunk_next);
654 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
655 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
656 shmem__bigchunk_next += n;
657 return shmem__bigchunk_next - n;
658}
659
660static SecMap* shmem__alloc_SecMap ( void )
661{
662 Word i, j;
663 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
664 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
665 tl_assert(sm);
666 sm->magic = SecMap_MAGIC;
667 for (i = 0; i < N_SECMAP_ZLINES; i++) {
668 sm->linesZ[i].dict[0] = SVal_NOACCESS;
669 sm->linesZ[i].dict[1] = SVal_INVALID;
670 sm->linesZ[i].dict[2] = SVal_INVALID;
671 sm->linesZ[i].dict[3] = SVal_INVALID;
672 for (j = 0; j < N_LINE_ARANGE/4; j++)
673 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
674 }
675 sm->linesF = NULL;
676 sm->linesF_size = 0;
677 stats__secmaps_allocd++;
678 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
679 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
680 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
681 return sm;
682}
683
684typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
685static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
686
687static SecMap* shmem__find_SecMap ( Addr ga )
688{
689 SecMap* sm = NULL;
690 Addr gaKey = shmem__round_to_SecMap_base(ga);
691 // Cache
692 stats__secmaps_search++;
693 if (LIKELY(gaKey == smCache[0].gaKey))
694 return smCache[0].sm;
695 if (LIKELY(gaKey == smCache[1].gaKey)) {
696 SMCacheEnt tmp = smCache[0];
697 smCache[0] = smCache[1];
698 smCache[1] = tmp;
699 return smCache[0].sm;
700 }
701 if (gaKey == smCache[2].gaKey) {
702 SMCacheEnt tmp = smCache[1];
703 smCache[1] = smCache[2];
704 smCache[2] = tmp;
705 return smCache[1].sm;
706 }
707 // end Cache
708 stats__secmaps_search_slow++;
709 if (VG_(lookupFM)( map_shmem,
710 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
711 tl_assert(sm != NULL);
712 smCache[2] = smCache[1];
713 smCache[1] = smCache[0];
714 smCache[0].gaKey = gaKey;
715 smCache[0].sm = sm;
716 } else {
717 tl_assert(sm == NULL);
718 }
719 return sm;
720}
721
722static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
723{
724 SecMap* sm = shmem__find_SecMap ( ga );
725 if (LIKELY(sm)) {
726 return sm;
727 } else {
728 /* create a new one */
729 Addr gaKey = shmem__round_to_SecMap_base(ga);
730 sm = shmem__alloc_SecMap();
731 tl_assert(sm);
732 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
733 return sm;
734 }
735}
736
737
738/* ------------ LineF and LineZ related ------------ */
739
740static void rcinc_LineF ( LineF* lineF ) {
741 UWord i;
742 tl_assert(lineF->inUse);
743 for (i = 0; i < N_LINE_ARANGE; i++)
744 rcinc(lineF->w64s[i]);
745}
746
747static void rcdec_LineF ( LineF* lineF ) {
748 UWord i;
749 tl_assert(lineF->inUse);
750 for (i = 0; i < N_LINE_ARANGE; i++)
751 rcdec(lineF->w64s[i]);
752}
753
754static void rcinc_LineZ ( LineZ* lineZ ) {
755 tl_assert(lineZ->dict[0] != SVal_INVALID);
756 rcinc(lineZ->dict[0]);
757 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
758 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
759 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
760}
761
762static void rcdec_LineZ ( LineZ* lineZ ) {
763 tl_assert(lineZ->dict[0] != SVal_INVALID);
764 rcdec(lineZ->dict[0]);
765 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
766 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
767 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
768}
769
770inline
771static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
772 Word bix, shft, mask, prep;
773 tl_assert(ix >= 0);
774 bix = ix >> 2;
775 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
776 mask = 3 << shft;
777 prep = b2 << shft;
778 arr[bix] = (arr[bix] & ~mask) | prep;
779}
780
781inline
782static UWord read_twobit_array ( UChar* arr, UWord ix ) {
783 Word bix, shft;
784 tl_assert(ix >= 0);
785 bix = ix >> 2;
786 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
787 return (arr[bix] >> shft) & 3;
788}
789
790/* Given address 'tag', find either the Z or F line containing relevant
791 data, so it can be read into the cache.
792*/
793static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
794 /*OUT*/LineF** fp, Addr tag ) {
795 LineZ* lineZ;
796 LineF* lineF;
797 UWord zix;
798 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
799 UWord smoff = shmem__get_SecMap_offset(tag);
800 /* since smoff is derived from a valid tag, it should be
801 cacheline-aligned. */
802 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
803 zix = smoff >> N_LINE_BITS;
804 tl_assert(zix < N_SECMAP_ZLINES);
805 lineZ = &sm->linesZ[zix];
806 lineF = NULL;
807 if (lineZ->dict[0] == SVal_INVALID) {
808 UInt fix = (UInt)lineZ->dict[1];
809 tl_assert(sm->linesF);
810 tl_assert(sm->linesF_size > 0);
811 tl_assert(fix >= 0 && fix < sm->linesF_size);
812 lineF = &sm->linesF[fix];
813 tl_assert(lineF->inUse);
814 lineZ = NULL;
815 }
816 *zp = lineZ;
817 *fp = lineF;
818}
819
820/* Given address 'tag', return the relevant SecMap and the index of
821 the LineZ within it, in the expectation that the line is to be
822 overwritten. Regardless of whether 'tag' is currently associated
823 with a Z or F representation, to rcdec on the current
824 representation, in recognition of the fact that the contents are
825 just about to be overwritten. */
826static __attribute__((noinline))
827void find_Z_for_writing ( /*OUT*/SecMap** smp,
828 /*OUT*/Word* zixp,
829 Addr tag ) {
830 LineZ* lineZ;
831 LineF* lineF;
832 UWord zix;
833 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
834 UWord smoff = shmem__get_SecMap_offset(tag);
835 /* since smoff is derived from a valid tag, it should be
836 cacheline-aligned. */
837 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
838 zix = smoff >> N_LINE_BITS;
839 tl_assert(zix < N_SECMAP_ZLINES);
840 lineZ = &sm->linesZ[zix];
841 lineF = NULL;
842 /* re RCs, we are freeing up this LineZ/LineF so that new data can
843 be parked in it. Hence have to rcdec it accordingly. */
844 /* If lineZ has an associated lineF, free it up. */
845 if (lineZ->dict[0] == SVal_INVALID) {
846 UInt fix = (UInt)lineZ->dict[1];
847 tl_assert(sm->linesF);
848 tl_assert(sm->linesF_size > 0);
849 tl_assert(fix >= 0 && fix < sm->linesF_size);
850 lineF = &sm->linesF[fix];
851 tl_assert(lineF->inUse);
852 rcdec_LineF(lineF);
853 lineF->inUse = False;
854 } else {
855 rcdec_LineZ(lineZ);
856 }
857 *smp = sm;
858 *zixp = zix;
859}
860
861static __attribute__((noinline))
862void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
863 UInt i, new_size;
864 LineF* nyu;
865
866 if (sm->linesF) {
867 tl_assert(sm->linesF_size > 0);
868 } else {
869 tl_assert(sm->linesF_size == 0);
870 }
871
872 if (sm->linesF) {
873 for (i = 0; i < sm->linesF_size; i++) {
874 if (!sm->linesF[i].inUse) {
875 *fixp = (Word)i;
876 return;
877 }
878 }
879 }
880
881 /* No free F line found. Expand existing array and try again. */
882 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
883 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
884 new_size * sizeof(LineF) );
sewardjf98e1c02008-10-25 16:22:41 +0000885
886 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
887 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
888 * sizeof(LineF);
889
890 if (0)
891 VG_(printf)("SM %p: expand F array from %d to %d\n",
892 sm, (Int)sm->linesF_size, new_size);
893
894 for (i = 0; i < new_size; i++)
895 nyu[i].inUse = False;
896
897 if (sm->linesF) {
898 for (i = 0; i < sm->linesF_size; i++) {
899 tl_assert(sm->linesF[i].inUse);
900 nyu[i] = sm->linesF[i];
901 }
902 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
903 HG_(free)(sm->linesF);
904 }
905
906 sm->linesF = nyu;
907 sm->linesF_size = new_size;
908
909 for (i = 0; i < sm->linesF_size; i++) {
910 if (!sm->linesF[i].inUse) {
911 *fixp = (Word)i;
912 return;
913 }
914 }
915
916 /*NOTREACHED*/
917 tl_assert(0);
918}
919
920
921/* ------------ CacheLine and implicit-tree related ------------ */
922
923__attribute__((unused))
924static void pp_CacheLine ( CacheLine* cl ) {
925 Word i;
926 if (!cl) {
927 VG_(printf)("%s","pp_CacheLine(NULL)\n");
928 return;
929 }
930 for (i = 0; i < N_LINE_TREES; i++)
931 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
932 for (i = 0; i < N_LINE_ARANGE; i++)
933 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
934}
935
936static UChar descr_to_validbits ( UShort descr )
937{
938 /* a.k.a Party Time for gcc's constant folder */
939# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
940 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
941 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
942 ( (b8_5) << 12) | ( (b8_4) << 11) | \
943 ( (b8_3) << 10) | ( (b8_2) << 9) | \
944 ( (b8_1) << 8) | ( (b8_0) << 7) | \
945 ( (b16_3) << 6) | ( (b32_1) << 5) | \
946 ( (b16_2) << 4) | ( (b64) << 3) | \
947 ( (b16_1) << 2) | ( (b32_0) << 1) | \
948 ( (b16_0) << 0) ) )
949
950# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
951 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
952 ( (bit5) << 5) | ( (bit4) << 4) | \
953 ( (bit3) << 3) | ( (bit2) << 2) | \
954 ( (bit1) << 1) | ( (bit0) << 0) ) )
955
956 /* these should all get folded out at compile time */
957 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
958 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
959 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
960 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
961 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
962 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
963 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
964 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
965 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
966
967 switch (descr) {
968 /*
969 +--------------------------------- TREE_DESCR_8_7
970 | +------------------- TREE_DESCR_8_0
971 | | +---------------- TREE_DESCR_16_3
972 | | | +-------------- TREE_DESCR_32_1
973 | | | | +------------ TREE_DESCR_16_2
974 | | | | | +--------- TREE_DESCR_64
975 | | | | | | +------ TREE_DESCR_16_1
976 | | | | | | | +---- TREE_DESCR_32_0
977 | | | | | | | | +-- TREE_DESCR_16_0
978 | | | | | | | | |
979 | | | | | | | | | GRANULARITY, 7 -> 0 */
980 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
981 return BYTE(1,1,1,1,1,1,1,1);
982 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
983 return BYTE(1,1,0,1,1,1,1,1);
984 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
985 return BYTE(0,1,1,1,1,1,1,1);
986 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
987 return BYTE(0,1,0,1,1,1,1,1);
988
989 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
990 return BYTE(1,1,1,1,1,1,0,1);
991 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
992 return BYTE(1,1,0,1,1,1,0,1);
993 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
994 return BYTE(0,1,1,1,1,1,0,1);
995 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
996 return BYTE(0,1,0,1,1,1,0,1);
997
998 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
999 return BYTE(1,1,1,1,0,1,1,1);
1000 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1001 return BYTE(1,1,0,1,0,1,1,1);
1002 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1003 return BYTE(0,1,1,1,0,1,1,1);
1004 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1005 return BYTE(0,1,0,1,0,1,1,1);
1006
1007 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1008 return BYTE(1,1,1,1,0,1,0,1);
1009 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1010 return BYTE(1,1,0,1,0,1,0,1);
1011 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1012 return BYTE(0,1,1,1,0,1,0,1);
1013 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1014 return BYTE(0,1,0,1,0,1,0,1);
1015
1016 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1017 return BYTE(0,0,0,1,1,1,1,1);
1018 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1019 return BYTE(0,0,0,1,1,1,0,1);
1020 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1021 return BYTE(0,0,0,1,0,1,1,1);
1022 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1023 return BYTE(0,0,0,1,0,1,0,1);
1024
1025 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1026 return BYTE(1,1,1,1,0,0,0,1);
1027 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1028 return BYTE(1,1,0,1,0,0,0,1);
1029 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1030 return BYTE(0,1,1,1,0,0,0,1);
1031 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1032 return BYTE(0,1,0,1,0,0,0,1);
1033
1034 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1035 return BYTE(0,0,0,1,0,0,0,1);
1036
1037 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1038 return BYTE(0,0,0,0,0,0,0,1);
1039
1040 default: return BYTE(0,0,0,0,0,0,0,0);
1041 /* INVALID - any valid descr produces at least one
1042 valid bit in tree[0..7]*/
1043 }
1044 /* NOTREACHED*/
1045 tl_assert(0);
1046
1047# undef DESCR
1048# undef BYTE
1049}
1050
1051__attribute__((unused))
1052static Bool is_sane_Descr ( UShort descr ) {
1053 return descr_to_validbits(descr) != 0;
1054}
1055
1056static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1057 VG_(sprintf)(dst,
1058 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1059 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1060 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1061 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1062 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1063 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1064 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1065 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1066 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1067 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1068 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1069 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1070 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1071 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1072 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1073 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1074 );
1075}
1076static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1077 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1078 (Int)((byte & 128) ? 1 : 0),
1079 (Int)((byte & 64) ? 1 : 0),
1080 (Int)((byte & 32) ? 1 : 0),
1081 (Int)((byte & 16) ? 1 : 0),
1082 (Int)((byte & 8) ? 1 : 0),
1083 (Int)((byte & 4) ? 1 : 0),
1084 (Int)((byte & 2) ? 1 : 0),
1085 (Int)((byte & 1) ? 1 : 0)
1086 );
1087}
1088
1089static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1090 Word i;
1091 UChar validbits = descr_to_validbits(descr);
florian7b7d5942014-12-19 20:29:22 +00001092 HChar buf[128], buf2[128]; // large enough
sewardjf98e1c02008-10-25 16:22:41 +00001093 if (validbits == 0)
1094 goto bad;
1095 for (i = 0; i < 8; i++) {
1096 if (validbits & (1<<i)) {
1097 if (tree[i] == SVal_INVALID)
1098 goto bad;
1099 } else {
1100 if (tree[i] != SVal_INVALID)
1101 goto bad;
1102 }
1103 }
1104 return True;
1105 bad:
1106 sprintf_Descr( buf, descr );
1107 sprintf_Byte( buf2, validbits );
1108 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1109 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1110 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1111 for (i = 0; i < 8; i++)
1112 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1113 VG_(printf)("%s","}\n");
1114 return 0;
1115}
1116
1117static Bool is_sane_CacheLine ( CacheLine* cl )
1118{
1119 Word tno, cloff;
1120
1121 if (!cl) goto bad;
1122
1123 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1124 UShort descr = cl->descrs[tno];
1125 SVal* tree = &cl->svals[cloff];
1126 if (!is_sane_Descr_and_Tree(descr, tree))
1127 goto bad;
1128 }
1129 tl_assert(cloff == N_LINE_ARANGE);
1130 return True;
1131 bad:
1132 pp_CacheLine(cl);
1133 return False;
1134}
1135
1136static UShort normalise_tree ( /*MOD*/SVal* tree )
1137{
1138 UShort descr;
1139 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1140 particular no zeroes. */
1141 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1142 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1143 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1144 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
1145 tl_assert(0);
1146
1147 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1148 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1149 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1150 /* build 16-bit layer */
1151 if (tree[1] == tree[0]) {
1152 tree[1] = SVal_INVALID;
1153 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1154 descr |= TREE_DESCR_16_0;
1155 }
1156 if (tree[3] == tree[2]) {
1157 tree[3] = SVal_INVALID;
1158 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1159 descr |= TREE_DESCR_16_1;
1160 }
1161 if (tree[5] == tree[4]) {
1162 tree[5] = SVal_INVALID;
1163 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1164 descr |= TREE_DESCR_16_2;
1165 }
1166 if (tree[7] == tree[6]) {
1167 tree[7] = SVal_INVALID;
1168 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1169 descr |= TREE_DESCR_16_3;
1170 }
1171 /* build 32-bit layer */
1172 if (tree[2] == tree[0]
1173 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1174 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1175 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1176 descr |= TREE_DESCR_32_0;
1177 }
1178 if (tree[6] == tree[4]
1179 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1180 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1181 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1182 descr |= TREE_DESCR_32_1;
1183 }
1184 /* build 64-bit layer */
1185 if (tree[4] == tree[0]
1186 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1187 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1188 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1189 descr |= TREE_DESCR_64;
1190 }
1191 return descr;
1192}
1193
1194/* This takes a cacheline where all the data is at the leaves
1195 (w8[..]) and builds a correctly normalised tree. */
1196static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1197{
1198 Word tno, cloff;
1199 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1200 SVal* tree = &cl->svals[cloff];
1201 cl->descrs[tno] = normalise_tree( tree );
1202 }
1203 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +00001204 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001205 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1206 stats__cline_normalises++;
1207}
1208
1209
1210typedef struct { UChar count; SVal sval; } CountedSVal;
1211
1212static
1213void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1214 /*OUT*/Word* dstUsedP,
1215 Word nDst, CacheLine* src )
1216{
1217 Word tno, cloff, dstUsed;
1218
1219 tl_assert(nDst == N_LINE_ARANGE);
1220 dstUsed = 0;
1221
1222 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1223 UShort descr = src->descrs[tno];
1224 SVal* tree = &src->svals[cloff];
1225
1226 /* sequentialise the tree described by (descr,tree). */
1227# define PUT(_n,_v) \
1228 do { dst[dstUsed ].count = (_n); \
1229 dst[dstUsed++].sval = (_v); \
1230 } while (0)
1231
1232 /* byte 0 */
1233 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1234 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1235 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1236 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1237 /* byte 1 */
1238 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1239 /* byte 2 */
1240 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1241 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1242 /* byte 3 */
1243 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1244 /* byte 4 */
1245 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1246 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1247 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1248 /* byte 5 */
1249 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1250 /* byte 6 */
1251 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1252 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1253 /* byte 7 */
1254 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1255
1256# undef PUT
1257 /* END sequentialise the tree described by (descr,tree). */
1258
1259 }
1260 tl_assert(cloff == N_LINE_ARANGE);
1261 tl_assert(dstUsed <= nDst);
1262
1263 *dstUsedP = dstUsed;
1264}
1265
1266/* Write the cacheline 'wix' to backing store. Where it ends up
1267 is determined by its tag field. */
1268static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1269{
1270 Word i, j, k, m;
1271 Addr tag;
1272 SecMap* sm;
1273 CacheLine* cl;
1274 LineZ* lineZ;
1275 LineF* lineF;
1276 Word zix, fix, csvalsUsed;
1277 CountedSVal csvals[N_LINE_ARANGE];
1278 SVal sv;
1279
1280 if (0)
1281 VG_(printf)("scache wback line %d\n", (Int)wix);
1282
1283 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1284
1285 tag = cache_shmem.tags0[wix];
1286 cl = &cache_shmem.lyns0[wix];
1287
1288 /* The cache line may have been invalidated; if so, ignore it. */
1289 if (!is_valid_scache_tag(tag))
1290 return;
1291
1292 /* Where are we going to put it? */
1293 sm = NULL;
1294 lineZ = NULL;
1295 lineF = NULL;
1296 zix = fix = -1;
1297
1298 /* find the Z line to write in and rcdec it or the associated F
1299 line. */
1300 find_Z_for_writing( &sm, &zix, tag );
1301
1302 tl_assert(sm);
1303 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1304 lineZ = &sm->linesZ[zix];
1305
1306 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001307 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001308 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1309
1310 csvalsUsed = -1;
1311 sequentialise_CacheLine( csvals, &csvalsUsed,
1312 N_LINE_ARANGE, cl );
1313 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1314 if (0) VG_(printf)("%lu ", csvalsUsed);
1315
1316 lineZ->dict[0] = lineZ->dict[1]
1317 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1318
1319 /* i indexes actual shadow values, k is cursor in csvals */
1320 i = 0;
1321 for (k = 0; k < csvalsUsed; k++) {
1322
1323 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001324 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001325 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1326 /* do we already have it? */
1327 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1328 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1329 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1330 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1331 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001332 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001333 tl_assert(sv != SVal_INVALID);
1334 if (lineZ->dict[0]
1335 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1336 if (lineZ->dict[1]
1337 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1338 if (lineZ->dict[2]
1339 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1340 if (lineZ->dict[3]
1341 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1342 break; /* we'll have to use the f rep */
1343 dict_ok:
1344 m = csvals[k].count;
1345 if (m == 8) {
1346 write_twobit_array( lineZ->ix2s, i+0, j );
1347 write_twobit_array( lineZ->ix2s, i+1, j );
1348 write_twobit_array( lineZ->ix2s, i+2, j );
1349 write_twobit_array( lineZ->ix2s, i+3, j );
1350 write_twobit_array( lineZ->ix2s, i+4, j );
1351 write_twobit_array( lineZ->ix2s, i+5, j );
1352 write_twobit_array( lineZ->ix2s, i+6, j );
1353 write_twobit_array( lineZ->ix2s, i+7, j );
1354 i += 8;
1355 }
1356 else if (m == 4) {
1357 write_twobit_array( lineZ->ix2s, i+0, j );
1358 write_twobit_array( lineZ->ix2s, i+1, j );
1359 write_twobit_array( lineZ->ix2s, i+2, j );
1360 write_twobit_array( lineZ->ix2s, i+3, j );
1361 i += 4;
1362 }
1363 else if (m == 1) {
1364 write_twobit_array( lineZ->ix2s, i+0, j );
1365 i += 1;
1366 }
1367 else if (m == 2) {
1368 write_twobit_array( lineZ->ix2s, i+0, j );
1369 write_twobit_array( lineZ->ix2s, i+1, j );
1370 i += 2;
1371 }
1372 else {
1373 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1374 }
1375
1376 }
1377
1378 if (LIKELY(i == N_LINE_ARANGE)) {
1379 /* Construction of the compressed representation was
1380 successful. */
1381 rcinc_LineZ(lineZ);
1382 stats__cache_Z_wbacks++;
1383 } else {
1384 /* Cannot use the compressed(z) representation. Use the full(f)
1385 rep instead. */
1386 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1387 alloc_F_for_writing( sm, &fix );
1388 tl_assert(sm->linesF);
1389 tl_assert(sm->linesF_size > 0);
1390 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1391 lineF = &sm->linesF[fix];
1392 tl_assert(!lineF->inUse);
1393 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1394 lineZ->dict[1] = (SVal)fix;
1395 lineF->inUse = True;
1396 i = 0;
1397 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001398 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001399 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1400 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001401 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001402 tl_assert(sv != SVal_INVALID);
1403 for (m = csvals[k].count; m > 0; m--) {
1404 lineF->w64s[i] = sv;
1405 i++;
1406 }
1407 }
1408 tl_assert(i == N_LINE_ARANGE);
1409 rcinc_LineF(lineF);
1410 stats__cache_F_wbacks++;
1411 }
sewardjf98e1c02008-10-25 16:22:41 +00001412}
1413
1414/* Fetch the cacheline 'wix' from the backing store. The tag
1415 associated with 'wix' is assumed to have already been filled in;
1416 hence that is used to determine where in the backing store to read
1417 from. */
1418static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1419{
1420 Word i;
1421 Addr tag;
1422 CacheLine* cl;
1423 LineZ* lineZ;
1424 LineF* lineF;
1425
1426 if (0)
1427 VG_(printf)("scache fetch line %d\n", (Int)wix);
1428
1429 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1430
1431 tag = cache_shmem.tags0[wix];
1432 cl = &cache_shmem.lyns0[wix];
1433
1434 /* reject nonsense requests */
1435 tl_assert(is_valid_scache_tag(tag));
1436
1437 lineZ = NULL;
1438 lineF = NULL;
1439 find_ZF_for_reading( &lineZ, &lineF, tag );
1440 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1441
1442 /* expand the data into the bottom layer of the tree, then get
1443 cacheline_normalise to build the descriptor array. */
1444 if (lineF) {
1445 tl_assert(lineF->inUse);
1446 for (i = 0; i < N_LINE_ARANGE; i++) {
1447 cl->svals[i] = lineF->w64s[i];
1448 }
1449 stats__cache_F_fetches++;
1450 } else {
1451 for (i = 0; i < N_LINE_ARANGE; i++) {
1452 SVal sv;
1453 UWord ix = read_twobit_array( lineZ->ix2s, i );
1454 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1455 sv = lineZ->dict[ix];
1456 tl_assert(sv != SVal_INVALID);
1457 cl->svals[i] = sv;
1458 }
1459 stats__cache_Z_fetches++;
1460 }
1461 normalise_CacheLine( cl );
1462}
1463
1464static void shmem__invalidate_scache ( void ) {
1465 Word wix;
1466 if (0) VG_(printf)("%s","scache inval\n");
1467 tl_assert(!is_valid_scache_tag(1));
1468 for (wix = 0; wix < N_WAY_NENT; wix++) {
1469 cache_shmem.tags0[wix] = 1/*INVALID*/;
1470 }
1471 stats__cache_invals++;
1472}
1473
1474static void shmem__flush_and_invalidate_scache ( void ) {
1475 Word wix;
1476 Addr tag;
1477 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1478 tl_assert(!is_valid_scache_tag(1));
1479 for (wix = 0; wix < N_WAY_NENT; wix++) {
1480 tag = cache_shmem.tags0[wix];
1481 if (tag == 1/*INVALID*/) {
1482 /* already invalid; nothing to do */
1483 } else {
1484 tl_assert(is_valid_scache_tag(tag));
1485 cacheline_wback( wix );
1486 }
1487 cache_shmem.tags0[wix] = 1/*INVALID*/;
1488 }
1489 stats__cache_flushes++;
1490 stats__cache_invals++;
1491}
1492
1493
1494static inline Bool aligned16 ( Addr a ) {
1495 return 0 == (a & 1);
1496}
1497static inline Bool aligned32 ( Addr a ) {
1498 return 0 == (a & 3);
1499}
1500static inline Bool aligned64 ( Addr a ) {
1501 return 0 == (a & 7);
1502}
1503static inline UWord get_cacheline_offset ( Addr a ) {
1504 return (UWord)(a & (N_LINE_ARANGE - 1));
1505}
1506static inline Addr cacheline_ROUNDUP ( Addr a ) {
1507 return ROUNDUP(a, N_LINE_ARANGE);
1508}
1509static inline Addr cacheline_ROUNDDN ( Addr a ) {
1510 return ROUNDDN(a, N_LINE_ARANGE);
1511}
1512static inline UWord get_treeno ( Addr a ) {
1513 return get_cacheline_offset(a) >> 3;
1514}
1515static inline UWord get_tree_offset ( Addr a ) {
1516 return a & 7;
1517}
1518
1519static __attribute__((noinline))
1520 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1521static inline CacheLine* get_cacheline ( Addr a )
1522{
1523 /* tag is 'a' with the in-line offset masked out,
1524 eg a[31]..a[4] 0000 */
1525 Addr tag = a & ~(N_LINE_ARANGE - 1);
1526 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1527 stats__cache_totrefs++;
1528 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1529 return &cache_shmem.lyns0[wix];
1530 } else {
1531 return get_cacheline_MISS( a );
1532 }
1533}
1534
1535static __attribute__((noinline))
1536 CacheLine* get_cacheline_MISS ( Addr a )
1537{
1538 /* tag is 'a' with the in-line offset masked out,
1539 eg a[31]..a[4] 0000 */
1540
1541 CacheLine* cl;
1542 Addr* tag_old_p;
1543 Addr tag = a & ~(N_LINE_ARANGE - 1);
1544 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1545
1546 tl_assert(tag != cache_shmem.tags0[wix]);
1547
1548 /* Dump the old line into the backing store. */
1549 stats__cache_totmisses++;
1550
1551 cl = &cache_shmem.lyns0[wix];
1552 tag_old_p = &cache_shmem.tags0[wix];
1553
1554 if (is_valid_scache_tag( *tag_old_p )) {
1555 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001556 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001557 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1558 cacheline_wback( wix );
1559 }
1560 /* and reload the new one */
1561 *tag_old_p = tag;
1562 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001563 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001564 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1565 return cl;
1566}
1567
1568static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1569 stats__cline_64to32pulldown++;
1570 switch (toff) {
1571 case 0: case 4:
1572 tl_assert(descr & TREE_DESCR_64);
1573 tree[4] = tree[0];
1574 descr &= ~TREE_DESCR_64;
1575 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1576 break;
1577 default:
1578 tl_assert(0);
1579 }
1580 return descr;
1581}
1582
1583static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1584 stats__cline_32to16pulldown++;
1585 switch (toff) {
1586 case 0: case 2:
1587 if (!(descr & TREE_DESCR_32_0)) {
1588 descr = pulldown_to_32(tree, 0, descr);
1589 }
1590 tl_assert(descr & TREE_DESCR_32_0);
1591 tree[2] = tree[0];
1592 descr &= ~TREE_DESCR_32_0;
1593 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1594 break;
1595 case 4: case 6:
1596 if (!(descr & TREE_DESCR_32_1)) {
1597 descr = pulldown_to_32(tree, 4, descr);
1598 }
1599 tl_assert(descr & TREE_DESCR_32_1);
1600 tree[6] = tree[4];
1601 descr &= ~TREE_DESCR_32_1;
1602 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1603 break;
1604 default:
1605 tl_assert(0);
1606 }
1607 return descr;
1608}
1609
1610static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1611 stats__cline_16to8pulldown++;
1612 switch (toff) {
1613 case 0: case 1:
1614 if (!(descr & TREE_DESCR_16_0)) {
1615 descr = pulldown_to_16(tree, 0, descr);
1616 }
1617 tl_assert(descr & TREE_DESCR_16_0);
1618 tree[1] = tree[0];
1619 descr &= ~TREE_DESCR_16_0;
1620 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1621 break;
1622 case 2: case 3:
1623 if (!(descr & TREE_DESCR_16_1)) {
1624 descr = pulldown_to_16(tree, 2, descr);
1625 }
1626 tl_assert(descr & TREE_DESCR_16_1);
1627 tree[3] = tree[2];
1628 descr &= ~TREE_DESCR_16_1;
1629 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1630 break;
1631 case 4: case 5:
1632 if (!(descr & TREE_DESCR_16_2)) {
1633 descr = pulldown_to_16(tree, 4, descr);
1634 }
1635 tl_assert(descr & TREE_DESCR_16_2);
1636 tree[5] = tree[4];
1637 descr &= ~TREE_DESCR_16_2;
1638 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1639 break;
1640 case 6: case 7:
1641 if (!(descr & TREE_DESCR_16_3)) {
1642 descr = pulldown_to_16(tree, 6, descr);
1643 }
1644 tl_assert(descr & TREE_DESCR_16_3);
1645 tree[7] = tree[6];
1646 descr &= ~TREE_DESCR_16_3;
1647 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1648 break;
1649 default:
1650 tl_assert(0);
1651 }
1652 return descr;
1653}
1654
1655
1656static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1657 UShort mask;
1658 switch (toff) {
1659 case 0:
1660 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1661 tl_assert( (descr & mask) == mask );
1662 descr &= ~mask;
1663 descr |= TREE_DESCR_16_0;
1664 break;
1665 case 2:
1666 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1667 tl_assert( (descr & mask) == mask );
1668 descr &= ~mask;
1669 descr |= TREE_DESCR_16_1;
1670 break;
1671 case 4:
1672 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1673 tl_assert( (descr & mask) == mask );
1674 descr &= ~mask;
1675 descr |= TREE_DESCR_16_2;
1676 break;
1677 case 6:
1678 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1679 tl_assert( (descr & mask) == mask );
1680 descr &= ~mask;
1681 descr |= TREE_DESCR_16_3;
1682 break;
1683 default:
1684 tl_assert(0);
1685 }
1686 return descr;
1687}
1688
1689static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1690 UShort mask;
1691 switch (toff) {
1692 case 0:
1693 if (!(descr & TREE_DESCR_16_0))
1694 descr = pullup_descr_to_16(descr, 0);
1695 if (!(descr & TREE_DESCR_16_1))
1696 descr = pullup_descr_to_16(descr, 2);
1697 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1698 tl_assert( (descr & mask) == mask );
1699 descr &= ~mask;
1700 descr |= TREE_DESCR_32_0;
1701 break;
1702 case 4:
1703 if (!(descr & TREE_DESCR_16_2))
1704 descr = pullup_descr_to_16(descr, 4);
1705 if (!(descr & TREE_DESCR_16_3))
1706 descr = pullup_descr_to_16(descr, 6);
1707 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1708 tl_assert( (descr & mask) == mask );
1709 descr &= ~mask;
1710 descr |= TREE_DESCR_32_1;
1711 break;
1712 default:
1713 tl_assert(0);
1714 }
1715 return descr;
1716}
1717
1718static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1719 switch (toff) {
1720 case 0: case 4:
1721 return 0 != (descr & TREE_DESCR_64);
1722 default:
1723 tl_assert(0);
1724 }
1725}
1726
1727static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1728 switch (toff) {
1729 case 0:
1730 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1731 case 2:
1732 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1733 case 4:
1734 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1735 case 6:
1736 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1737 default:
1738 tl_assert(0);
1739 }
1740}
1741
1742/* ------------ Cache management ------------ */
1743
1744static void zsm_flush_cache ( void )
1745{
1746 shmem__flush_and_invalidate_scache();
1747}
1748
1749
1750static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1751{
1752 tl_assert( sizeof(UWord) == sizeof(Addr) );
1753
1754 rcinc = p_rcinc;
1755 rcdec = p_rcdec;
1756
1757 tl_assert(map_shmem == NULL);
1758 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1759 HG_(free),
1760 NULL/*unboxed UWord cmp*/);
sewardjf98e1c02008-10-25 16:22:41 +00001761 shmem__invalidate_scache();
1762
1763 /* a SecMap must contain an integral number of CacheLines */
1764 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1765 /* also ... a CacheLine holds an integral number of trees */
1766 tl_assert(0 == (N_LINE_ARANGE % 8));
1767}
1768
1769/////////////////////////////////////////////////////////////////
1770/////////////////////////////////////////////////////////////////
1771// //
1772// SECTION END compressed shadow memory //
1773// //
1774/////////////////////////////////////////////////////////////////
1775/////////////////////////////////////////////////////////////////
1776
1777
1778
1779/////////////////////////////////////////////////////////////////
1780/////////////////////////////////////////////////////////////////
1781// //
1782// SECTION BEGIN vts primitives //
1783// //
1784/////////////////////////////////////////////////////////////////
1785/////////////////////////////////////////////////////////////////
1786
sewardjf98e1c02008-10-25 16:22:41 +00001787
sewardje4cce742011-02-24 15:25:24 +00001788/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1789 being compact stand-ins for Thr*'s. Use these functions to map
1790 between them. */
1791static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
1792static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
1793
sewardje4cce742011-02-24 15:25:24 +00001794__attribute__((noreturn))
1795static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
1796{
1797 if (due_to_nThrs) {
florian6bf37262012-10-21 03:23:36 +00001798 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001799 "\n"
1800 "Helgrind: cannot continue, run aborted: too many threads.\n"
1801 "Sorry. Helgrind can only handle programs that create\n"
1802 "%'llu or fewer threads over their entire lifetime.\n"
1803 "\n";
sewardj03e7d272011-05-04 09:08:34 +00001804 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
sewardje4cce742011-02-24 15:25:24 +00001805 } else {
florian6bf37262012-10-21 03:23:36 +00001806 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001807 "\n"
1808 "Helgrind: cannot continue, run aborted: too many\n"
1809 "synchronisation events. Sorry. Helgrind can only handle\n"
1810 "programs which perform %'llu or fewer\n"
1811 "inter-thread synchronisation events (locks, unlocks, etc).\n"
1812 "\n";
1813 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
1814 }
1815 VG_(exit)(1);
1816 /*NOTREACHED*/
1817 tl_assert(0); /*wtf?!*/
1818}
1819
1820
philippec3508652015-03-28 12:01:58 +00001821/* The dead thread (ThrID, actually) tables. A thread may only be
sewardjffce8152011-06-24 10:09:41 +00001822 listed here if we have been notified thereof by libhb_async_exit.
1823 New entries are added at the end. The order isn't important, but
philippec3508652015-03-28 12:01:58 +00001824 the ThrID values must be unique.
1825 verydead_thread_table_not_pruned lists the identity of the threads
1826 that died since the previous round of pruning.
1827 Once pruning is done, these ThrID are added in verydead_thread_table.
1828 We don't actually need to keep the set of threads that have ever died --
sewardjffce8152011-06-24 10:09:41 +00001829 only the threads that have died since the previous round of
1830 pruning. But it's useful for sanity check purposes to keep the
1831 entire set, so we do. */
philippec3508652015-03-28 12:01:58 +00001832static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
sewardjffce8152011-06-24 10:09:41 +00001833static XArray* /* of ThrID */ verydead_thread_table = NULL;
1834
1835/* Arbitrary total ordering on ThrIDs. */
florian6bd9dc12012-11-23 16:17:43 +00001836static Int cmp__ThrID ( const void* v1, const void* v2 ) {
1837 ThrID id1 = *(const ThrID*)v1;
1838 ThrID id2 = *(const ThrID*)v2;
sewardjffce8152011-06-24 10:09:41 +00001839 if (id1 < id2) return -1;
1840 if (id1 > id2) return 1;
1841 return 0;
1842}
1843
philippec3508652015-03-28 12:01:58 +00001844static void verydead_thread_tables_init ( void )
sewardjffce8152011-06-24 10:09:41 +00001845{
1846 tl_assert(!verydead_thread_table);
philippec3508652015-03-28 12:01:58 +00001847 tl_assert(!verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00001848 verydead_thread_table
1849 = VG_(newXA)( HG_(zalloc),
1850 "libhb.verydead_thread_table_init.1",
1851 HG_(free), sizeof(ThrID) );
sewardjffce8152011-06-24 10:09:41 +00001852 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
philippec3508652015-03-28 12:01:58 +00001853 verydead_thread_table_not_pruned
1854 = VG_(newXA)( HG_(zalloc),
1855 "libhb.verydead_thread_table_init.2",
1856 HG_(free), sizeof(ThrID) );
1857 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
sewardjffce8152011-06-24 10:09:41 +00001858}
1859
philippec3508652015-03-28 12:01:58 +00001860static void verydead_thread_table_sort_and_check (XArray* thrids)
1861{
1862 UWord i;
1863
1864 VG_(sortXA)( thrids );
1865 /* Sanity check: check for unique .sts.thr values. */
1866 UWord nBT = VG_(sizeXA)( thrids );
1867 if (nBT > 0) {
1868 ThrID thrid1, thrid2;
1869 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
1870 for (i = 1; i < nBT; i++) {
1871 thrid1 = thrid2;
1872 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
1873 tl_assert(thrid1 < thrid2);
1874 }
1875 }
1876 /* Ok, so the dead thread table thrids has unique and in-order keys. */
1877}
sewardjf98e1c02008-10-25 16:22:41 +00001878
1879/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1880 a backlink for the caller's convenience. Since we have no idea
1881 what to set that to in the library, it always gets set to
1882 VtsID_INVALID. */
1883typedef
1884 struct {
sewardj7aa38a92011-02-27 23:04:12 +00001885 VtsID id;
1886 UInt usedTS;
1887 UInt sizeTS;
1888 ScalarTS ts[0];
sewardjf98e1c02008-10-25 16:22:41 +00001889 }
1890 VTS;
1891
sewardj7aa38a92011-02-27 23:04:12 +00001892/* Allocate a VTS capable of storing 'sizeTS' entries. */
florian6bd9dc12012-11-23 16:17:43 +00001893static VTS* VTS__new ( const HChar* who, UInt sizeTS );
sewardjf98e1c02008-10-25 16:22:41 +00001894
sewardjffce8152011-06-24 10:09:41 +00001895/* Make a clone of 'vts', sizing the new array to exactly match the
sewardj7aa38a92011-02-27 23:04:12 +00001896 number of ScalarTSs present. */
florian6bd9dc12012-11-23 16:17:43 +00001897static VTS* VTS__clone ( const HChar* who, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001898
sewardjffce8152011-06-24 10:09:41 +00001899/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
1900 array is sized exactly to hold the number of required elements.
1901 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
1902 must be in strictly increasing order. */
florian6bd9dc12012-11-23 16:17:43 +00001903static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
sewardjffce8152011-06-24 10:09:41 +00001904
sewardjf98e1c02008-10-25 16:22:41 +00001905/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00001906static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001907
sewardj7aa38a92011-02-27 23:04:12 +00001908/* Create a new singleton VTS in 'out'. Caller must have
1909 pre-allocated 'out' sufficiently big to hold the result in all
1910 possible cases. */
1911static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00001912
sewardj7aa38a92011-02-27 23:04:12 +00001913/* Create in 'out' a VTS which is the same as 'vts' except with
1914 vts[me]++, so to speak. Caller must have pre-allocated 'out'
1915 sufficiently big to hold the result in all possible cases. */
1916static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001917
sewardj7aa38a92011-02-27 23:04:12 +00001918/* Create in 'out' a VTS which is the join (max) of 'a' and
1919 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
1920 the result in all possible cases. */
1921static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001922
sewardj23f12002009-07-24 08:45:08 +00001923/* Compute the partial ordering relation of the two args. Although we
1924 could be completely general and return an enumeration value (EQ,
1925 LT, GT, UN), in fact we only need LEQ, and so we may as well
1926 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00001927
sewardje4cce742011-02-24 15:25:24 +00001928 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
1929 invald ThrID). In the latter case, the returned ThrID indicates
1930 the discovered point for which they are not. There may be more
1931 than one such point, but we only care about seeing one of them, not
1932 all of them. This rather strange convention is used because
1933 sometimes we want to know the actual index at which they first
1934 differ. */
1935static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001936
1937/* Compute an arbitrary structural (total) ordering on the two args,
1938 based on their VCs, so they can be looked up in a table, tree, etc.
1939 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00001940static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001941
florianb28fe892014-10-28 20:52:07 +00001942/* Debugging only. Display the given VTS. */
1943static void VTS__show ( const VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001944
1945/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00001946static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001947
sewardjffce8152011-06-24 10:09:41 +00001948/* Notify the VTS machinery that a thread has been declared
1949 comprehensively dead: that is, it has done an async exit AND it has
1950 been joined with. This should ensure that its local clocks (.viR
1951 and .viW) will never again change, and so all mentions of this
1952 thread from all VTSs in the system may be removed. */
1953static void VTS__declare_thread_very_dead ( Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001954
1955/*--------------- to do with Vector Timestamps ---------------*/
1956
sewardjf98e1c02008-10-25 16:22:41 +00001957static Bool is_sane_VTS ( VTS* vts )
1958{
1959 UWord i, n;
1960 ScalarTS *st1, *st2;
1961 if (!vts) return False;
sewardj555fc572011-02-27 23:39:53 +00001962 if (vts->usedTS > vts->sizeTS) return False;
sewardj7aa38a92011-02-27 23:04:12 +00001963 n = vts->usedTS;
1964 if (n == 1) {
1965 st1 = &vts->ts[0];
1966 if (st1->tym == 0)
1967 return False;
1968 }
1969 else
sewardjf98e1c02008-10-25 16:22:41 +00001970 if (n >= 2) {
1971 for (i = 0; i < n-1; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00001972 st1 = &vts->ts[i];
1973 st2 = &vts->ts[i+1];
sewardje4cce742011-02-24 15:25:24 +00001974 if (st1->thrid >= st2->thrid)
sewardjf98e1c02008-10-25 16:22:41 +00001975 return False;
1976 if (st1->tym == 0 || st2->tym == 0)
1977 return False;
1978 }
1979 }
1980 return True;
1981}
1982
1983
sewardj7aa38a92011-02-27 23:04:12 +00001984/* Create a new, empty VTS.
sewardjf98e1c02008-10-25 16:22:41 +00001985*/
florian6bd9dc12012-11-23 16:17:43 +00001986static VTS* VTS__new ( const HChar* who, UInt sizeTS )
sewardjf98e1c02008-10-25 16:22:41 +00001987{
sewardj7aa38a92011-02-27 23:04:12 +00001988 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
1989 tl_assert(vts->usedTS == 0);
1990 vts->sizeTS = sizeTS;
1991 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
sewardjf98e1c02008-10-25 16:22:41 +00001992 return vts;
1993}
1994
sewardj7aa38a92011-02-27 23:04:12 +00001995/* Clone this VTS.
1996*/
florian6bd9dc12012-11-23 16:17:43 +00001997static VTS* VTS__clone ( const HChar* who, VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00001998{
1999 tl_assert(vts);
2000 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2001 UInt nTS = vts->usedTS;
2002 VTS* clone = VTS__new(who, nTS);
2003 clone->id = vts->id;
2004 clone->sizeTS = nTS;
2005 clone->usedTS = nTS;
2006 UInt i;
2007 for (i = 0; i < nTS; i++) {
2008 clone->ts[i] = vts->ts[i];
2009 }
2010 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2011 return clone;
2012}
2013
sewardjf98e1c02008-10-25 16:22:41 +00002014
sewardjffce8152011-06-24 10:09:41 +00002015/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
2016 must be in strictly increasing order. We could obviously do this
2017 much more efficiently (in linear time) if necessary.
2018*/
florian6bd9dc12012-11-23 16:17:43 +00002019static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
sewardjffce8152011-06-24 10:09:41 +00002020{
2021 UInt i, j;
2022 tl_assert(vts);
2023 tl_assert(thridsToDel);
2024 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2025 UInt nTS = vts->usedTS;
2026 /* Figure out how many ScalarTSs will remain in the output. */
2027 UInt nReq = nTS;
2028 for (i = 0; i < nTS; i++) {
2029 ThrID thrid = vts->ts[i].thrid;
2030 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2031 nReq--;
2032 }
2033 tl_assert(nReq <= nTS);
2034 /* Copy the ones that will remain. */
2035 VTS* res = VTS__new(who, nReq);
2036 j = 0;
2037 for (i = 0; i < nTS; i++) {
2038 ThrID thrid = vts->ts[i].thrid;
2039 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2040 continue;
2041 res->ts[j++] = vts->ts[i];
2042 }
2043 tl_assert(j == nReq);
2044 tl_assert(j == res->sizeTS);
2045 res->usedTS = j;
2046 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2047 return res;
2048}
2049
2050
sewardjf98e1c02008-10-25 16:22:41 +00002051/* Delete this VTS in its entirety.
2052*/
sewardj7aa38a92011-02-27 23:04:12 +00002053static void VTS__delete ( VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002054{
2055 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002056 tl_assert(vts->usedTS <= vts->sizeTS);
2057 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
sewardjf98e1c02008-10-25 16:22:41 +00002058 HG_(free)(vts);
2059}
2060
2061
2062/* Create a new singleton VTS.
2063*/
sewardj7aa38a92011-02-27 23:04:12 +00002064static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2065{
sewardjf98e1c02008-10-25 16:22:41 +00002066 tl_assert(thr);
2067 tl_assert(tym >= 1);
sewardj7aa38a92011-02-27 23:04:12 +00002068 tl_assert(out);
2069 tl_assert(out->usedTS == 0);
2070 tl_assert(out->sizeTS >= 1);
2071 UInt hi = out->usedTS++;
2072 out->ts[hi].thrid = Thr__to_ThrID(thr);
2073 out->ts[hi].tym = tym;
sewardjf98e1c02008-10-25 16:22:41 +00002074}
2075
2076
2077/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2078 not modified.
2079*/
sewardj7aa38a92011-02-27 23:04:12 +00002080static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002081{
sewardj7aa38a92011-02-27 23:04:12 +00002082 UInt i, n;
sewardje4cce742011-02-24 15:25:24 +00002083 ThrID me_thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002084 Bool found = False;
sewardjc8028ad2010-05-05 09:34:42 +00002085
2086 stats__vts__tick++;
2087
sewardj7aa38a92011-02-27 23:04:12 +00002088 tl_assert(out);
2089 tl_assert(out->usedTS == 0);
2090 if (vts->usedTS >= ThrID_MAX_VALID)
2091 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2092 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2093
sewardjf98e1c02008-10-25 16:22:41 +00002094 tl_assert(me);
sewardje4cce742011-02-24 15:25:24 +00002095 me_thrid = Thr__to_ThrID(me);
sewardjf98e1c02008-10-25 16:22:41 +00002096 tl_assert(is_sane_VTS(vts));
sewardj7aa38a92011-02-27 23:04:12 +00002097 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002098
sewardj555fc572011-02-27 23:39:53 +00002099 /* Copy all entries which precede 'me'. */
2100 for (i = 0; i < n; i++) {
2101 ScalarTS* here = &vts->ts[i];
2102 if (UNLIKELY(here->thrid >= me_thrid))
2103 break;
2104 UInt hi = out->usedTS++;
2105 out->ts[hi] = *here;
2106 }
2107
2108 /* 'i' now indicates the next entry to copy, if any.
2109 There are 3 possibilities:
2110 (a) there is no next entry (we used them all up already):
2111 add (me_thrid,1) to the output, and quit
2112 (b) there is a next entry, and its thrid > me_thrid:
2113 add (me_thrid,1) to the output, then copy the remaining entries
2114 (c) there is a next entry, and its thrid == me_thrid:
2115 copy it to the output but increment its timestamp value.
2116 Then copy the remaining entries. (c) is the common case.
2117 */
2118 tl_assert(i >= 0 && i <= n);
2119 if (i == n) { /* case (a) */
sewardj7aa38a92011-02-27 23:04:12 +00002120 UInt hi = out->usedTS++;
2121 out->ts[hi].thrid = me_thrid;
2122 out->ts[hi].tym = 1;
sewardj555fc572011-02-27 23:39:53 +00002123 } else {
2124 /* cases (b) and (c) */
2125 ScalarTS* here = &vts->ts[i];
2126 if (me_thrid == here->thrid) { /* case (c) */
sewardj7aa38a92011-02-27 23:04:12 +00002127 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
sewardje4cce742011-02-24 15:25:24 +00002128 /* We're hosed. We have to stop. */
2129 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2130 }
sewardj7aa38a92011-02-27 23:04:12 +00002131 UInt hi = out->usedTS++;
2132 out->ts[hi].thrid = here->thrid;
2133 out->ts[hi].tym = here->tym + 1;
sewardjf98e1c02008-10-25 16:22:41 +00002134 i++;
sewardj555fc572011-02-27 23:39:53 +00002135 found = True;
2136 } else { /* case (b) */
sewardj7aa38a92011-02-27 23:04:12 +00002137 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002138 out->ts[hi].thrid = me_thrid;
2139 out->ts[hi].tym = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002140 }
sewardj555fc572011-02-27 23:39:53 +00002141 /* And copy any remaining entries. */
sewardjf98e1c02008-10-25 16:22:41 +00002142 for (/*keepgoing*/; i < n; i++) {
sewardj555fc572011-02-27 23:39:53 +00002143 ScalarTS* here2 = &vts->ts[i];
sewardj7aa38a92011-02-27 23:04:12 +00002144 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002145 out->ts[hi] = *here2;
sewardjf98e1c02008-10-25 16:22:41 +00002146 }
2147 }
sewardj555fc572011-02-27 23:39:53 +00002148
sewardj7aa38a92011-02-27 23:04:12 +00002149 tl_assert(is_sane_VTS(out));
2150 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2151 tl_assert(out->usedTS <= out->sizeTS);
sewardjf98e1c02008-10-25 16:22:41 +00002152}
2153
2154
2155/* Return a new VTS constructed as the join (max) of the 2 args.
2156 Neither arg is modified.
2157*/
sewardj7aa38a92011-02-27 23:04:12 +00002158static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002159{
sewardj7aa38a92011-02-27 23:04:12 +00002160 UInt ia, ib, useda, usedb;
sewardjf98e1c02008-10-25 16:22:41 +00002161 ULong tyma, tymb, tymMax;
sewardje4cce742011-02-24 15:25:24 +00002162 ThrID thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002163 UInt ncommon = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002164
sewardjc8028ad2010-05-05 09:34:42 +00002165 stats__vts__join++;
2166
sewardj7aa38a92011-02-27 23:04:12 +00002167 tl_assert(a);
2168 tl_assert(b);
2169 useda = a->usedTS;
2170 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002171
sewardj7aa38a92011-02-27 23:04:12 +00002172 tl_assert(out);
2173 tl_assert(out->usedTS == 0);
2174 /* overly conservative test, but doing better involves comparing
2175 the two VTSs, which we don't want to do at this point. */
2176 if (useda + usedb >= ThrID_MAX_VALID)
2177 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2178 tl_assert(out->sizeTS >= useda + usedb);
2179
sewardjf98e1c02008-10-25 16:22:41 +00002180 ia = ib = 0;
2181
2182 while (1) {
2183
sewardje4cce742011-02-24 15:25:24 +00002184 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2185 from a and b in order, where thrid is the next ThrID
sewardjf98e1c02008-10-25 16:22:41 +00002186 occurring in either a or b, and tyma/b are the relevant
2187 scalar timestamps, taking into account implicit zeroes. */
2188 tl_assert(ia >= 0 && ia <= useda);
2189 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002190
njn4c245e52009-03-15 23:25:38 +00002191 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002192 /* both empty - done */
2193 break;
njn4c245e52009-03-15 23:25:38 +00002194
2195 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002196 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002197 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002198 thrid = tmpb->thrid;
2199 tyma = 0;
2200 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002201 ib++;
njn4c245e52009-03-15 23:25:38 +00002202
2203 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002204 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002205 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002206 thrid = tmpa->thrid;
2207 tyma = tmpa->tym;
2208 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002209 ia++;
njn4c245e52009-03-15 23:25:38 +00002210
2211 } else {
sewardje4cce742011-02-24 15:25:24 +00002212 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002213 ScalarTS* tmpa = &a->ts[ia];
2214 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002215 if (tmpa->thrid < tmpb->thrid) {
2216 /* a has the lowest unconsidered ThrID */
2217 thrid = tmpa->thrid;
2218 tyma = tmpa->tym;
2219 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002220 ia++;
sewardje4cce742011-02-24 15:25:24 +00002221 } else if (tmpa->thrid > tmpb->thrid) {
2222 /* b has the lowest unconsidered ThrID */
2223 thrid = tmpb->thrid;
2224 tyma = 0;
2225 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002226 ib++;
2227 } else {
sewardje4cce742011-02-24 15:25:24 +00002228 /* they both next mention the same ThrID */
2229 tl_assert(tmpa->thrid == tmpb->thrid);
2230 thrid = tmpa->thrid; /* == tmpb->thrid */
2231 tyma = tmpa->tym;
2232 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002233 ia++;
2234 ib++;
sewardj7aa38a92011-02-27 23:04:12 +00002235 ncommon++;
sewardjf98e1c02008-10-25 16:22:41 +00002236 }
2237 }
2238
2239 /* having laboriously determined (thr, tyma, tymb), do something
2240 useful with it. */
2241 tymMax = tyma > tymb ? tyma : tymb;
2242 if (tymMax > 0) {
sewardj7aa38a92011-02-27 23:04:12 +00002243 UInt hi = out->usedTS++;
2244 out->ts[hi].thrid = thrid;
2245 out->ts[hi].tym = tymMax;
sewardjf98e1c02008-10-25 16:22:41 +00002246 }
2247
2248 }
2249
sewardj7aa38a92011-02-27 23:04:12 +00002250 tl_assert(is_sane_VTS(out));
2251 tl_assert(out->usedTS <= out->sizeTS);
2252 tl_assert(out->usedTS == useda + usedb - ncommon);
sewardjf98e1c02008-10-25 16:22:41 +00002253}
2254
2255
sewardje4cce742011-02-24 15:25:24 +00002256/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2257 they are, or the first ThrID for which they are not (no valid ThrID
2258 has the value zero). This rather strange convention is used
2259 because sometimes we want to know the actual index at which they
2260 first differ. */
2261static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002262{
sewardj23f12002009-07-24 08:45:08 +00002263 Word ia, ib, useda, usedb;
2264 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00002265
sewardjc8028ad2010-05-05 09:34:42 +00002266 stats__vts__cmpLEQ++;
2267
sewardj7aa38a92011-02-27 23:04:12 +00002268 tl_assert(a);
2269 tl_assert(b);
2270 useda = a->usedTS;
2271 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002272
2273 ia = ib = 0;
2274
2275 while (1) {
2276
njn4c245e52009-03-15 23:25:38 +00002277 /* This logic is to enumerate doubles (tyma, tymb) drawn
2278 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00002279 scalar timestamps, taking into account implicit zeroes. */
sewardje4cce742011-02-24 15:25:24 +00002280 ThrID thrid;
sewardj23f12002009-07-24 08:45:08 +00002281
sewardjf98e1c02008-10-25 16:22:41 +00002282 tl_assert(ia >= 0 && ia <= useda);
2283 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002284
njn4c245e52009-03-15 23:25:38 +00002285 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002286 /* both empty - done */
2287 break;
njn4c245e52009-03-15 23:25:38 +00002288
2289 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002290 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002291 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002292 tyma = 0;
2293 tymb = tmpb->tym;
2294 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002295 ib++;
njn4c245e52009-03-15 23:25:38 +00002296
2297 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002298 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002299 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002300 tyma = tmpa->tym;
2301 thrid = tmpa->thrid;
2302 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002303 ia++;
njn4c245e52009-03-15 23:25:38 +00002304
2305 } else {
sewardje4cce742011-02-24 15:25:24 +00002306 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002307 ScalarTS* tmpa = &a->ts[ia];
2308 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002309 if (tmpa->thrid < tmpb->thrid) {
2310 /* a has the lowest unconsidered ThrID */
2311 tyma = tmpa->tym;
2312 thrid = tmpa->thrid;
2313 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002314 ia++;
2315 }
2316 else
sewardje4cce742011-02-24 15:25:24 +00002317 if (tmpa->thrid > tmpb->thrid) {
2318 /* b has the lowest unconsidered ThrID */
2319 tyma = 0;
2320 tymb = tmpb->tym;
2321 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002322 ib++;
2323 } else {
sewardje4cce742011-02-24 15:25:24 +00002324 /* they both next mention the same ThrID */
2325 tl_assert(tmpa->thrid == tmpb->thrid);
2326 tyma = tmpa->tym;
2327 thrid = tmpa->thrid;
2328 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002329 ia++;
2330 ib++;
2331 }
2332 }
2333
njn4c245e52009-03-15 23:25:38 +00002334 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00002335 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00002336 if (tyma > tymb) {
2337 /* not LEQ at this index. Quit, since the answer is
2338 determined already. */
sewardje4cce742011-02-24 15:25:24 +00002339 tl_assert(thrid >= 1024);
2340 return thrid;
sewardj23f12002009-07-24 08:45:08 +00002341 }
sewardjf98e1c02008-10-25 16:22:41 +00002342 }
2343
sewardje4cce742011-02-24 15:25:24 +00002344 return 0; /* all points are LEQ => return an invalid ThrID */
sewardjf98e1c02008-10-25 16:22:41 +00002345}
2346
2347
2348/* Compute an arbitrary structural (total) ordering on the two args,
2349 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00002350 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2351 performance critical so there is some effort expended to make it sa
2352 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00002353*/
2354Word VTS__cmp_structural ( VTS* a, VTS* b )
2355{
2356 /* We just need to generate an arbitrary total ordering based on
2357 a->ts and b->ts. Preferably do it in a way which comes across likely
2358 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00002359 Word i;
2360 Word useda = 0, usedb = 0;
2361 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002362
sewardjc8028ad2010-05-05 09:34:42 +00002363 stats__vts__cmp_structural++;
2364
2365 tl_assert(a);
2366 tl_assert(b);
2367
sewardj7aa38a92011-02-27 23:04:12 +00002368 ctsa = &a->ts[0]; useda = a->usedTS;
2369 ctsb = &b->ts[0]; usedb = b->usedTS;
sewardjc8028ad2010-05-05 09:34:42 +00002370
2371 if (LIKELY(useda == usedb)) {
2372 ScalarTS *tmpa = NULL, *tmpb = NULL;
2373 stats__vts__cmp_structural_slow++;
2374 /* Same length vectors. Find the first difference, if any, as
2375 fast as possible. */
2376 for (i = 0; i < useda; i++) {
2377 tmpa = &ctsa[i];
2378 tmpb = &ctsb[i];
sewardje4cce742011-02-24 15:25:24 +00002379 if (LIKELY(tmpa->tym == tmpb->tym
2380 && tmpa->thrid == tmpb->thrid))
sewardjc8028ad2010-05-05 09:34:42 +00002381 continue;
2382 else
2383 break;
2384 }
2385 if (UNLIKELY(i == useda)) {
2386 /* They're identical. */
2387 return 0;
2388 } else {
2389 tl_assert(i >= 0 && i < useda);
2390 if (tmpa->tym < tmpb->tym) return -1;
2391 if (tmpa->tym > tmpb->tym) return 1;
sewardje4cce742011-02-24 15:25:24 +00002392 if (tmpa->thrid < tmpb->thrid) return -1;
2393 if (tmpa->thrid > tmpb->thrid) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002394 /* we just established them as non-identical, hence: */
2395 }
2396 /*NOTREACHED*/
2397 tl_assert(0);
2398 }
sewardjf98e1c02008-10-25 16:22:41 +00002399
2400 if (useda < usedb) return -1;
2401 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002402 /*NOTREACHED*/
2403 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00002404}
2405
2406
florianb28fe892014-10-28 20:52:07 +00002407/* Debugging only. Display the given VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002408*/
florianb28fe892014-10-28 20:52:07 +00002409static void VTS__show ( const VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002410{
sewardjf98e1c02008-10-25 16:22:41 +00002411 Word i, n;
florian4367abe2015-02-28 09:22:09 +00002412 tl_assert(vts);
florianb28fe892014-10-28 20:52:07 +00002413
2414 VG_(printf)("[");
sewardj7aa38a92011-02-27 23:04:12 +00002415 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002416 for (i = 0; i < n; i++) {
florianb28fe892014-10-28 20:52:07 +00002417 const ScalarTS *st = &vts->ts[i];
2418 VG_(printf)(i < n-1 ? "%u:%llu " : "%u:%llu", st->thrid, (ULong)st->tym);
sewardjf98e1c02008-10-25 16:22:41 +00002419 }
florianb28fe892014-10-28 20:52:07 +00002420 VG_(printf)("]");
sewardjf98e1c02008-10-25 16:22:41 +00002421}
2422
2423
2424/* Debugging only. Return vts[index], so to speak.
2425*/
sewardj7aa38a92011-02-27 23:04:12 +00002426ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2427{
sewardjf98e1c02008-10-25 16:22:41 +00002428 UWord i, n;
sewardje4cce742011-02-24 15:25:24 +00002429 ThrID idx_thrid = Thr__to_ThrID(idx);
sewardjc8028ad2010-05-05 09:34:42 +00002430 stats__vts__indexat_slow++;
florian4367abe2015-02-28 09:22:09 +00002431 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002432 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002433 for (i = 0; i < n; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002434 ScalarTS* st = &vts->ts[i];
sewardje4cce742011-02-24 15:25:24 +00002435 if (st->thrid == idx_thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002436 return st->tym;
2437 }
2438 return 0;
2439}
2440
2441
sewardjffce8152011-06-24 10:09:41 +00002442/* See comment on prototype above.
2443*/
2444static void VTS__declare_thread_very_dead ( Thr* thr )
2445{
2446 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2447
2448 tl_assert(thr->llexit_done);
2449 tl_assert(thr->joinedwith_done);
2450
2451 ThrID nyu;
2452 nyu = Thr__to_ThrID(thr);
philippec3508652015-03-28 12:01:58 +00002453 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
sewardjffce8152011-06-24 10:09:41 +00002454
2455 /* We can only get here if we're assured that we'll never again
2456 need to look at this thread's ::viR or ::viW. Set them to
2457 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2458 mostly so that we don't wind up pruning them (as that would be
2459 nonsensical: the only interesting ScalarTS entry for a dead
2460 thread is its own index, and the pruning will remove that.). */
2461 VtsID__rcdec(thr->viR);
2462 VtsID__rcdec(thr->viW);
2463 thr->viR = VtsID_INVALID;
2464 thr->viW = VtsID_INVALID;
2465}
2466
2467
sewardjf98e1c02008-10-25 16:22:41 +00002468/////////////////////////////////////////////////////////////////
2469/////////////////////////////////////////////////////////////////
2470// //
2471// SECTION END vts primitives //
2472// //
2473/////////////////////////////////////////////////////////////////
2474/////////////////////////////////////////////////////////////////
2475
2476
2477
2478/////////////////////////////////////////////////////////////////
2479/////////////////////////////////////////////////////////////////
2480// //
2481// SECTION BEGIN main library //
2482// //
2483/////////////////////////////////////////////////////////////////
2484/////////////////////////////////////////////////////////////////
2485
2486
2487/////////////////////////////////////////////////////////
2488// //
2489// VTS set //
2490// //
2491/////////////////////////////////////////////////////////
2492
sewardjffce8152011-06-24 10:09:41 +00002493static WordFM* /* WordFM VTS* void */ vts_set = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002494
2495static void vts_set_init ( void )
2496{
2497 tl_assert(!vts_set);
2498 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2499 HG_(free),
2500 (Word(*)(UWord,UWord))VTS__cmp_structural );
sewardjf98e1c02008-10-25 16:22:41 +00002501}
2502
sewardj7aa38a92011-02-27 23:04:12 +00002503/* Given a VTS, look in vts_set to see if we already have a
2504 structurally identical one. If yes, return the pair (True, pointer
2505 to the existing one). If no, clone this one, add the clone to the
2506 set, and return (False, pointer to the clone). */
2507static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002508{
2509 UWord keyW, valW;
sewardj7aa38a92011-02-27 23:04:12 +00002510 stats__vts_set__focaa++;
2511 tl_assert(cand->id == VtsID_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00002512 /* lookup cand (by value) */
2513 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2514 /* found it */
2515 tl_assert(valW == 0);
2516 /* if this fails, cand (by ref) was already present (!) */
2517 tl_assert(keyW != (UWord)cand);
sewardj7aa38a92011-02-27 23:04:12 +00002518 *res = (VTS*)keyW;
2519 return True;
sewardjf98e1c02008-10-25 16:22:41 +00002520 } else {
sewardj7aa38a92011-02-27 23:04:12 +00002521 /* not present. Clone, add and return address of clone. */
2522 stats__vts_set__focaa_a++;
2523 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2524 tl_assert(clone != cand);
2525 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2526 *res = clone;
2527 return False;
sewardjf98e1c02008-10-25 16:22:41 +00002528 }
2529}
2530
2531
2532/////////////////////////////////////////////////////////
2533// //
2534// VTS table //
2535// //
2536/////////////////////////////////////////////////////////
2537
2538static void VtsID__invalidate_caches ( void ); /* fwds */
2539
2540/* A type to hold VTS table entries. Invariants:
2541 If .vts == NULL, then this entry is not in use, so:
2542 - .rc == 0
2543 - this entry is on the freelist (unfortunately, does not imply
sewardjffce8152011-06-24 10:09:41 +00002544 any constraints on value for .freelink)
sewardjf98e1c02008-10-25 16:22:41 +00002545 If .vts != NULL, then this entry is in use:
2546 - .vts is findable in vts_set
2547 - .vts->id == this entry number
2548 - no specific value for .rc (even 0 is OK)
sewardjffce8152011-06-24 10:09:41 +00002549 - this entry is not on freelist, so .freelink == VtsID_INVALID
sewardjf98e1c02008-10-25 16:22:41 +00002550*/
2551typedef
2552 struct {
2553 VTS* vts; /* vts, in vts_set */
2554 UWord rc; /* reference count - enough for entire aspace */
2555 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
sewardjffce8152011-06-24 10:09:41 +00002556 VtsID remap; /* used only during pruning */
sewardjf98e1c02008-10-25 16:22:41 +00002557 }
2558 VtsTE;
2559
2560/* The VTS table. */
2561static XArray* /* of VtsTE */ vts_tab = NULL;
2562
2563/* An index into the VTS table, indicating the start of the list of
2564 free (available for use) entries. If the list is empty, this is
2565 VtsID_INVALID. */
2566static VtsID vts_tab_freelist = VtsID_INVALID;
2567
2568/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2569 vts_tab equals or exceeds this size. After GC, the value here is
2570 set appropriately so as to check for the next GC point. */
2571static Word vts_next_GC_at = 1000;
2572
2573static void vts_tab_init ( void )
2574{
florian91ed8cc2014-09-15 18:50:17 +00002575 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2576 HG_(free), sizeof(VtsTE) );
2577 vts_tab_freelist = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002578}
2579
2580/* Add ii to the free list, checking that it looks out-of-use. */
2581static void add_to_free_list ( VtsID ii )
2582{
2583 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2584 tl_assert(ie->vts == NULL);
2585 tl_assert(ie->rc == 0);
2586 tl_assert(ie->freelink == VtsID_INVALID);
2587 ie->freelink = vts_tab_freelist;
2588 vts_tab_freelist = ii;
2589}
2590
2591/* Get an entry from the free list. This will return VtsID_INVALID if
2592 the free list is empty. */
2593static VtsID get_from_free_list ( void )
2594{
2595 VtsID ii;
2596 VtsTE* ie;
2597 if (vts_tab_freelist == VtsID_INVALID)
2598 return VtsID_INVALID;
2599 ii = vts_tab_freelist;
2600 ie = VG_(indexXA)( vts_tab, ii );
2601 tl_assert(ie->vts == NULL);
2602 tl_assert(ie->rc == 0);
2603 vts_tab_freelist = ie->freelink;
2604 return ii;
2605}
2606
2607/* Produce a new VtsID that can be used, either by getting it from
2608 the freelist, or, if that is empty, by expanding vts_tab. */
2609static VtsID get_new_VtsID ( void )
2610{
2611 VtsID ii;
2612 VtsTE te;
2613 ii = get_from_free_list();
2614 if (ii != VtsID_INVALID)
2615 return ii;
2616 te.vts = NULL;
2617 te.rc = 0;
2618 te.freelink = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00002619 te.remap = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002620 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2621 return ii;
2622}
2623
2624
2625/* Indirect callback from lib_zsm. */
2626static void VtsID__rcinc ( VtsID ii )
2627{
2628 VtsTE* ie;
2629 /* VG_(indexXA) does a range check for us */
2630 ie = VG_(indexXA)( vts_tab, ii );
2631 tl_assert(ie->vts); /* else it's not in use */
2632 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2633 tl_assert(ie->vts->id == ii);
2634 ie->rc++;
2635}
2636
2637/* Indirect callback from lib_zsm. */
2638static void VtsID__rcdec ( VtsID ii )
2639{
2640 VtsTE* ie;
2641 /* VG_(indexXA) does a range check for us */
2642 ie = VG_(indexXA)( vts_tab, ii );
2643 tl_assert(ie->vts); /* else it's not in use */
2644 tl_assert(ie->rc > 0); /* else RC snafu */
2645 tl_assert(ie->vts->id == ii);
2646 ie->rc--;
2647}
2648
2649
sewardj7aa38a92011-02-27 23:04:12 +00002650/* Look up 'cand' in our collection of VTSs. If present, return the
2651 VtsID for the pre-existing version. If not present, clone it, add
2652 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2653 it, and return that. */
2654static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002655{
sewardj7aa38a92011-02-27 23:04:12 +00002656 VTS* in_tab = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002657 tl_assert(cand->id == VtsID_INVALID);
sewardj7aa38a92011-02-27 23:04:12 +00002658 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2659 tl_assert(in_tab);
2660 if (already_have) {
2661 /* We already have a copy of 'cand'. Use that. */
sewardjf98e1c02008-10-25 16:22:41 +00002662 VtsTE* ie;
sewardj7aa38a92011-02-27 23:04:12 +00002663 tl_assert(in_tab->id != VtsID_INVALID);
2664 ie = VG_(indexXA)( vts_tab, in_tab->id );
2665 tl_assert(ie->vts == in_tab);
2666 return in_tab->id;
sewardjf98e1c02008-10-25 16:22:41 +00002667 } else {
2668 VtsID ii = get_new_VtsID();
2669 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
sewardj7aa38a92011-02-27 23:04:12 +00002670 ie->vts = in_tab;
sewardjf98e1c02008-10-25 16:22:41 +00002671 ie->rc = 0;
2672 ie->freelink = VtsID_INVALID;
sewardj7aa38a92011-02-27 23:04:12 +00002673 in_tab->id = ii;
sewardjf98e1c02008-10-25 16:22:41 +00002674 return ii;
2675 }
2676}
2677
2678
florian6bd9dc12012-11-23 16:17:43 +00002679static void show_vts_stats ( const HChar* caller )
sewardjf98e1c02008-10-25 16:22:41 +00002680{
2681 UWord nSet, nTab, nLive;
2682 ULong totrc;
2683 UWord n, i;
2684 nSet = VG_(sizeFM)( vts_set );
2685 nTab = VG_(sizeXA)( vts_tab );
2686 totrc = 0;
2687 nLive = 0;
2688 n = VG_(sizeXA)( vts_tab );
2689 for (i = 0; i < n; i++) {
2690 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2691 if (ie->vts) {
2692 nLive++;
2693 totrc += (ULong)ie->rc;
2694 } else {
2695 tl_assert(ie->rc == 0);
2696 }
2697 }
2698 VG_(printf)(" show_vts_stats %s\n", caller);
2699 VG_(printf)(" vts_tab size %4lu\n", nTab);
2700 VG_(printf)(" vts_tab live %4lu\n", nLive);
2701 VG_(printf)(" vts_set size %4lu\n", nSet);
2702 VG_(printf)(" total rc %4llu\n", totrc);
2703}
2704
sewardjffce8152011-06-24 10:09:41 +00002705
2706/* --- Helpers for VtsID pruning --- */
2707
2708static
2709void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2710 /*MOD*/XArray* /* of VtsTE */ new_tab,
2711 VtsID* ii )
2712{
2713 VtsTE *old_te, *new_te;
2714 VtsID old_id, new_id;
2715 /* We're relying here on VG_(indexXA)'s range checking to assert on
2716 any stupid values, in particular *ii == VtsID_INVALID. */
2717 old_id = *ii;
2718 old_te = VG_(indexXA)( old_tab, old_id );
2719 old_te->rc--;
2720 new_id = old_te->remap;
2721 new_te = VG_(indexXA)( new_tab, new_id );
2722 new_te->rc++;
2723 *ii = new_id;
2724}
2725
2726static
2727void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2728 /*MOD*/XArray* /* of VtsTE */ new_tab,
2729 SVal* s )
2730{
2731 SVal old_sv, new_sv;
2732 old_sv = *s;
2733 if (SVal__isC(old_sv)) {
2734 VtsID rMin, wMin;
2735 rMin = SVal__unC_Rmin(old_sv);
2736 wMin = SVal__unC_Wmin(old_sv);
2737 remap_VtsID( old_tab, new_tab, &rMin );
2738 remap_VtsID( old_tab, new_tab, &wMin );
2739 new_sv = SVal__mkC( rMin, wMin );
2740 *s = new_sv;
2741 }
2742}
2743
2744
sewardjf98e1c02008-10-25 16:22:41 +00002745/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002746__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002747static void vts_tab__do_GC ( Bool show_stats )
2748{
2749 UWord i, nTab, nLive, nFreed;
2750
sewardjffce8152011-06-24 10:09:41 +00002751 /* ---------- BEGIN VTS GC ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00002752 /* check this is actually necessary. */
2753 tl_assert(vts_tab_freelist == VtsID_INVALID);
2754
2755 /* empty the caches for partial order checks and binary joins. We
2756 could do better and prune out the entries to be deleted, but it
2757 ain't worth the hassle. */
2758 VtsID__invalidate_caches();
2759
2760 /* First, make the reference counts up to date. */
2761 zsm_flush_cache();
2762
2763 nTab = VG_(sizeXA)( vts_tab );
2764
2765 if (show_stats) {
2766 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2767 show_vts_stats("before GC");
2768 }
2769
sewardjffce8152011-06-24 10:09:41 +00002770 /* Now we can inspect the entire vts_tab. Any entries with zero
2771 .rc fields are now no longer in use and can be put back on the
sewardjf98e1c02008-10-25 16:22:41 +00002772 free list, removed from vts_set, and deleted. */
2773 nFreed = 0;
2774 for (i = 0; i < nTab; i++) {
2775 Bool present;
sewardjffce8152011-06-24 10:09:41 +00002776 UWord oldK = 0, oldV = 12345;
sewardjf98e1c02008-10-25 16:22:41 +00002777 VtsTE* te = VG_(indexXA)( vts_tab, i );
2778 if (te->vts == NULL) {
2779 tl_assert(te->rc == 0);
2780 continue; /* already on the free list (presumably) */
2781 }
2782 if (te->rc > 0)
2783 continue; /* in use */
2784 /* Ok, we got one we can free. */
2785 tl_assert(te->vts->id == i);
2786 /* first, remove it from vts_set. */
2787 present = VG_(delFromFM)( vts_set,
2788 &oldK, &oldV, (UWord)te->vts );
2789 tl_assert(present); /* else it isn't in vts_set ?! */
2790 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2791 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2792 /* now free the VTS itself */
2793 VTS__delete(te->vts);
2794 te->vts = NULL;
2795 /* and finally put this entry on the free list */
2796 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2797 add_to_free_list( i );
2798 nFreed++;
2799 }
2800
2801 /* Now figure out when the next GC should be. We'll allow the
2802 number of VTSs to double before GCing again. Except of course
2803 that since we can't (or, at least, don't) shrink vts_tab, we
2804 can't set the threshhold value smaller than it. */
2805 tl_assert(nFreed <= nTab);
2806 nLive = nTab - nFreed;
2807 tl_assert(nLive >= 0 && nLive <= nTab);
2808 vts_next_GC_at = 2 * nLive;
2809 if (vts_next_GC_at < nTab)
2810 vts_next_GC_at = nTab;
2811
2812 if (show_stats) {
2813 show_vts_stats("after GC");
2814 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2815 }
2816
sewardj5e2ac3b2009-08-11 10:39:25 +00002817 if (VG_(clo_stats)) {
sewardjffce8152011-06-24 10:09:41 +00002818 static UInt ctr = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002819 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002820 VG_(message)(Vg_DebugMsg,
sewardj24118492009-07-15 14:50:02 +00002821 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
sewardj8aa41de2009-01-22 12:24:26 +00002822 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00002823 }
sewardjffce8152011-06-24 10:09:41 +00002824 /* ---------- END VTS GC ---------- */
2825
2826 /* Decide whether to do VTS pruning. We have one of three
2827 settings. */
2828 static UInt pruning_auto_ctr = 0; /* do not make non-static */
2829
2830 Bool do_pruning = False;
2831 switch (HG_(clo_vts_pruning)) {
2832 case 0: /* never */
2833 break;
2834 case 1: /* auto */
2835 do_pruning = (++pruning_auto_ctr % 5) == 0;
2836 break;
2837 case 2: /* always */
2838 do_pruning = True;
2839 break;
2840 default:
2841 tl_assert(0);
2842 }
2843
2844 /* The rest of this routine only handles pruning, so we can
2845 quit at this point if it is not to be done. */
2846 if (!do_pruning)
2847 return;
philippec3508652015-03-28 12:01:58 +00002848 /* No need to do pruning if no thread died since the last pruning as
2849 no VtsTE can be pruned. */
2850 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
2851 return;
sewardjffce8152011-06-24 10:09:41 +00002852
2853 /* ---------- BEGIN VTS PRUNING ---------- */
philippec3508652015-03-28 12:01:58 +00002854 /* Sort and check the very dead threads that died since the last pruning.
2855 Sorting is used for the check and so that we can quickly look
sewardjffce8152011-06-24 10:09:41 +00002856 up the dead-thread entries as we work through the VTSs. */
philippec3508652015-03-28 12:01:58 +00002857 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00002858
2859 /* We will run through the old table, and create a new table and
2860 set, at the same time setting the .remap entries in the old
2861 table to point to the new entries. Then, visit every VtsID in
2862 the system, and replace all of them with new ones, using the
2863 .remap entries in the old table. Finally, we can delete the old
2864 table and set. */
2865
2866 XArray* /* of VtsTE */ new_tab
2867 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
2868 HG_(free), sizeof(VtsTE) );
2869
2870 /* WordFM VTS* void */
2871 WordFM* new_set
2872 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
2873 HG_(free),
2874 (Word(*)(UWord,UWord))VTS__cmp_structural );
2875
2876 /* Visit each old VTS. For each one:
2877
2878 * make a pruned version
2879
2880 * search new_set for the pruned version, yielding either
2881 Nothing (not present) or the new VtsID for it.
2882
2883 * if not present, allocate a new VtsID for it, insert (pruned
2884 VTS, new VtsID) in the tree, and set
2885 remap_table[old VtsID] = new VtsID.
2886
2887 * if present, set remap_table[old VtsID] = new VtsID, where
2888 new VtsID was determined by the tree lookup. Then free up
2889 the clone.
2890 */
2891
2892 UWord nBeforePruning = 0, nAfterPruning = 0;
2893 UWord nSTSsBefore = 0, nSTSsAfter = 0;
2894 VtsID new_VtsID_ctr = 0;
2895
2896 for (i = 0; i < nTab; i++) {
2897
2898 /* For each old VTS .. */
2899 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
2900 VTS* old_vts = old_te->vts;
2901 tl_assert(old_te->remap == VtsID_INVALID);
2902
2903 /* Skip it if not in use */
2904 if (old_te->rc == 0) {
2905 tl_assert(old_vts == NULL);
2906 continue;
2907 }
2908 tl_assert(old_vts != NULL);
2909 tl_assert(old_vts->id == i);
2910 tl_assert(old_vts->ts != NULL);
2911
2912 /* It is in use. Make a pruned version. */
2913 nBeforePruning++;
2914 nSTSsBefore += old_vts->usedTS;
2915 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
philippec3508652015-03-28 12:01:58 +00002916 old_vts, verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00002917 tl_assert(new_vts->sizeTS == new_vts->usedTS);
2918 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
2919 == 0x0ddC0ffeeBadF00dULL);
2920
2921 /* Get rid of the old VTS and the tree entry. It's a bit more
2922 complex to incrementally delete the VTSs now than to nuke
2923 them all after we're done, but the upside is that we don't
2924 wind up temporarily storing potentially two complete copies
2925 of each VTS and hence spiking memory use. */
2926 UWord oldK = 0, oldV = 12345;
2927 Bool present = VG_(delFromFM)( vts_set,
2928 &oldK, &oldV, (UWord)old_vts );
2929 tl_assert(present); /* else it isn't in vts_set ?! */
2930 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2931 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
2932 /* now free the VTS itself */
2933 VTS__delete(old_vts);
2934 old_te->vts = NULL;
2935 old_vts = NULL;
2936
2937 /* NO MENTIONS of old_vts allowed beyond this point. */
2938
2939 /* Ok, we have the pruned copy in new_vts. See if a
2940 structurally identical version is already present in new_set.
2941 If so, delete the one we just made and move on; if not, add
2942 it. */
2943 VTS* identical_version = NULL;
2944 UWord valW = 12345;
2945 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
2946 (UWord)new_vts)) {
2947 // already have it
2948 tl_assert(valW == 0);
2949 tl_assert(identical_version != NULL);
2950 tl_assert(identical_version != new_vts);
2951 VTS__delete(new_vts);
2952 new_vts = identical_version;
2953 tl_assert(new_vts->id != VtsID_INVALID);
2954 } else {
2955 tl_assert(valW == 12345);
2956 tl_assert(identical_version == NULL);
2957 new_vts->id = new_VtsID_ctr++;
2958 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
2959 tl_assert(!b);
2960 VtsTE new_te;
2961 new_te.vts = new_vts;
2962 new_te.rc = 0;
2963 new_te.freelink = VtsID_INVALID;
2964 new_te.remap = VtsID_INVALID;
2965 Word j = VG_(addToXA)( new_tab, &new_te );
2966 tl_assert(j <= i);
2967 tl_assert(j == new_VtsID_ctr - 1);
2968 // stats
2969 nAfterPruning++;
2970 nSTSsAfter += new_vts->usedTS;
2971 }
2972 old_te->remap = new_vts->id;
2973
2974 } /* for (i = 0; i < nTab; i++) */
2975
philippec3508652015-03-28 12:01:58 +00002976 /* Move very dead thread from verydead_thread_table_not_pruned to
2977 verydead_thread_table. Sort and check verydead_thread_table
2978 to verify a thread was reported very dead only once. */
2979 {
2980 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
2981
2982 for (i = 0; i < nBT; i++) {
2983 ThrID thrid =
2984 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
2985 VG_(addToXA)( verydead_thread_table, &thrid );
2986 }
2987 verydead_thread_table_sort_and_check (verydead_thread_table);
2988 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
2989 }
2990
sewardjffce8152011-06-24 10:09:41 +00002991 /* At this point, we have:
2992 * the old VTS table, with its .remap entries set,
2993 and with all .vts == NULL.
2994 * the old VTS tree should be empty, since it and the old VTSs
2995 it contained have been incrementally deleted was we worked
2996 through the old table.
2997 * the new VTS table, with all .rc == 0, all .freelink and .remap
2998 == VtsID_INVALID.
2999 * the new VTS tree.
3000 */
3001 tl_assert( VG_(sizeFM)(vts_set) == 0 );
3002
3003 /* Now actually apply the mapping. */
3004 /* Visit all the VtsIDs in the entire system. Where do we expect
3005 to find them?
3006 (a) in shadow memory -- the LineZs and LineFs
3007 (b) in our collection of struct _Thrs.
3008 (c) in our collection of struct _SOs.
3009 Nowhere else, AFAICS. Not in the zsm cache, because that just
3010 got invalidated.
3011
3012 Using the .remap fields in vts_tab, map each old VtsID to a new
3013 VtsID. For each old VtsID, dec its rc; and for each new one,
3014 inc it. This sets up the new refcounts, and it also gives a
3015 cheap sanity check of the old ones: all old refcounts should be
3016 zero after this operation.
3017 */
3018
3019 /* Do the mappings for (a) above: iterate over the Primary shadow
3020 mem map (WordFM Addr SecMap*). */
3021 UWord secmapW = 0;
3022 VG_(initIterFM)( map_shmem );
3023 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3024 UWord j;
3025 SecMap* sm = (SecMap*)secmapW;
3026 tl_assert(sm->magic == SecMap_MAGIC);
3027 /* Deal with the LineZs */
3028 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3029 LineZ* lineZ = &sm->linesZ[i];
3030 if (lineZ->dict[0] == SVal_INVALID)
3031 continue; /* not in use -- data is in F rep instead */
3032 for (j = 0; j < 4; j++)
3033 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3034 }
3035 /* Deal with the LineFs */
3036 for (i = 0; i < sm->linesF_size; i++) {
3037 LineF* lineF = &sm->linesF[i];
3038 if (!lineF->inUse)
3039 continue;
3040 for (j = 0; j < N_LINE_ARANGE; j++)
3041 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3042 }
3043 }
3044 VG_(doneIterFM)( map_shmem );
3045
3046 /* Do the mappings for (b) above: visit our collection of struct
3047 _Thrs. */
3048 Thread* hgthread = get_admin_threads();
3049 tl_assert(hgthread);
3050 while (hgthread) {
3051 Thr* hbthr = hgthread->hbthr;
3052 tl_assert(hbthr);
3053 /* Threads that are listed in the prunable set have their viR
3054 and viW set to VtsID_INVALID, so we can't mess with them. */
3055 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3056 tl_assert(hbthr->viR == VtsID_INVALID);
3057 tl_assert(hbthr->viW == VtsID_INVALID);
3058 hgthread = hgthread->admin;
3059 continue;
3060 }
3061 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3062 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3063 hgthread = hgthread->admin;
3064 }
3065
3066 /* Do the mappings for (c) above: visit the struct _SOs. */
3067 SO* so = admin_SO;
3068 while (so) {
3069 if (so->viR != VtsID_INVALID)
3070 remap_VtsID( vts_tab, new_tab, &so->viR );
3071 if (so->viW != VtsID_INVALID)
3072 remap_VtsID( vts_tab, new_tab, &so->viW );
3073 so = so->admin_next;
3074 }
3075
3076 /* So, we're nearly done (with this incredibly complex operation).
3077 Check the refcounts for the old VtsIDs all fell to zero, as
3078 expected. Any failure is serious. */
3079 for (i = 0; i < nTab; i++) {
3080 VtsTE* te = VG_(indexXA)( vts_tab, i );
3081 tl_assert(te->vts == NULL);
3082 /* This is the assert proper. Note we're also asserting
3083 zeroness for old entries which are unmapped (hence have
3084 .remap == VtsID_INVALID). That's OK. */
3085 tl_assert(te->rc == 0);
3086 }
3087
3088 /* Install the new table and set. */
3089 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3090 vts_set = new_set;
3091 VG_(deleteXA)( vts_tab );
3092 vts_tab = new_tab;
3093
3094 /* The freelist of vts_tab entries is empty now, because we've
3095 compacted all of the live entries at the low end of the
3096 table. */
3097 vts_tab_freelist = VtsID_INVALID;
3098
3099 /* Sanity check vts_set and vts_tab. */
3100
3101 /* Because all the live entries got slid down to the bottom of vts_tab: */
3102 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3103
3104 /* Assert that the vts_tab and vts_set entries point at each other
3105 in the required way */
3106 UWord wordK = 0, wordV = 0;
3107 VG_(initIterFM)( vts_set );
3108 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3109 tl_assert(wordK != 0);
3110 tl_assert(wordV == 0);
3111 VTS* vts = (VTS*)wordK;
3112 tl_assert(vts->id != VtsID_INVALID);
3113 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3114 tl_assert(te->vts == vts);
3115 }
3116 VG_(doneIterFM)( vts_set );
3117
3118 /* Also iterate over the table, and check each entry is
3119 plausible. */
3120 nTab = VG_(sizeXA)( vts_tab );
3121 for (i = 0; i < nTab; i++) {
3122 VtsTE* te = VG_(indexXA)( vts_tab, i );
3123 tl_assert(te->vts);
3124 tl_assert(te->vts->id == i);
3125 tl_assert(te->rc > 0); /* 'cos we just GC'd */
3126 tl_assert(te->freelink == VtsID_INVALID); /* in use */
3127 tl_assert(te->remap == VtsID_INVALID); /* not relevant */
3128 }
3129
3130 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
3131 if (VG_(clo_stats)) {
3132 static UInt ctr = 1;
3133 tl_assert(nTab > 0);
3134 VG_(message)(
3135 Vg_DebugMsg,
3136 "libhb: VTS PR: #%u before %lu (avg sz %lu) "
3137 "after %lu (avg sz %lu)\n",
3138 ctr++,
3139 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3140 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3141 );
3142 }
sewardjffce8152011-06-24 10:09:41 +00003143 /* ---------- END VTS PRUNING ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00003144}
3145
3146
3147/////////////////////////////////////////////////////////
3148// //
3149// Vts IDs //
3150// //
3151/////////////////////////////////////////////////////////
3152
3153//////////////////////////
sewardj7aa38a92011-02-27 23:04:12 +00003154/* A temporary, max-sized VTS which is used as a temporary (the first
3155 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3156static VTS* temp_max_sized_VTS = NULL;
3157
3158//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00003159static ULong stats__cmpLEQ_queries = 0;
3160static ULong stats__cmpLEQ_misses = 0;
3161static ULong stats__join2_queries = 0;
3162static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003163
3164static inline UInt ROL32 ( UInt w, Int n ) {
3165 w = (w << n) | (w >> (32-n));
3166 return w;
3167}
3168static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3169 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3170 return hash % nTab;
3171}
3172
sewardj23f12002009-07-24 08:45:08 +00003173#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00003174static
sewardj23f12002009-07-24 08:45:08 +00003175 struct { VtsID vi1; VtsID vi2; Bool leq; }
3176 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00003177
3178#define N_JOIN2_CACHE 1023
3179static
3180 struct { VtsID vi1; VtsID vi2; VtsID res; }
3181 join2_cache[N_JOIN2_CACHE];
3182
3183static void VtsID__invalidate_caches ( void ) {
3184 Int i;
sewardj23f12002009-07-24 08:45:08 +00003185 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3186 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3187 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3188 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00003189 }
3190 for (i = 0; i < N_JOIN2_CACHE; i++) {
3191 join2_cache[i].vi1 = VtsID_INVALID;
3192 join2_cache[i].vi2 = VtsID_INVALID;
3193 join2_cache[i].res = VtsID_INVALID;
3194 }
3195}
3196//////////////////////////
3197
sewardjd52392d2008-11-08 20:36:26 +00003198//static Bool VtsID__is_valid ( VtsID vi ) {
3199// VtsTE* ve;
3200// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3201// return False;
3202// ve = VG_(indexXA)( vts_tab, vi );
3203// if (!ve->vts)
3204// return False;
3205// tl_assert(ve->vts->id == vi);
3206// return True;
3207//}
sewardjf98e1c02008-10-25 16:22:41 +00003208
3209static VTS* VtsID__to_VTS ( VtsID vi ) {
3210 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3211 tl_assert(te->vts);
3212 return te->vts;
3213}
3214
3215static void VtsID__pp ( VtsID vi ) {
sewardjf98e1c02008-10-25 16:22:41 +00003216 VTS* vts = VtsID__to_VTS(vi);
florianb28fe892014-10-28 20:52:07 +00003217 VTS__show( vts );
sewardjf98e1c02008-10-25 16:22:41 +00003218}
3219
3220/* compute partial ordering relation of vi1 and vi2. */
3221__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00003222static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00003223 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00003224 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00003225 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00003226 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00003227 tl_assert(vi1 != vi2);
3228 ////++
sewardj23f12002009-07-24 08:45:08 +00003229 stats__cmpLEQ_queries++;
3230 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3231 if (cmpLEQ_cache[hash].vi1 == vi1
3232 && cmpLEQ_cache[hash].vi2 == vi2)
3233 return cmpLEQ_cache[hash].leq;
3234 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00003235 ////--
3236 v1 = VtsID__to_VTS(vi1);
3237 v2 = VtsID__to_VTS(vi2);
sewardje4cce742011-02-24 15:25:24 +00003238 leq = VTS__cmpLEQ( v1, v2 ) == 0;
sewardjf98e1c02008-10-25 16:22:41 +00003239 ////++
sewardj23f12002009-07-24 08:45:08 +00003240 cmpLEQ_cache[hash].vi1 = vi1;
3241 cmpLEQ_cache[hash].vi2 = vi2;
3242 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00003243 ////--
sewardj23f12002009-07-24 08:45:08 +00003244 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00003245}
sewardj23f12002009-07-24 08:45:08 +00003246static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3247 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003248}
3249
3250/* compute binary join */
3251__attribute__((noinline))
3252static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3253 UInt hash;
3254 VtsID res;
sewardj7aa38a92011-02-27 23:04:12 +00003255 VTS *vts1, *vts2;
sewardjf98e1c02008-10-25 16:22:41 +00003256 //if (vi1 == vi2) return vi1;
3257 tl_assert(vi1 != vi2);
3258 ////++
3259 stats__join2_queries++;
3260 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3261 if (join2_cache[hash].vi1 == vi1
3262 && join2_cache[hash].vi2 == vi2)
3263 return join2_cache[hash].res;
3264 stats__join2_misses++;
3265 ////--
3266 vts1 = VtsID__to_VTS(vi1);
3267 vts2 = VtsID__to_VTS(vi2);
sewardj7aa38a92011-02-27 23:04:12 +00003268 temp_max_sized_VTS->usedTS = 0;
3269 VTS__join(temp_max_sized_VTS, vts1,vts2);
3270 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003271 ////++
3272 join2_cache[hash].vi1 = vi1;
3273 join2_cache[hash].vi2 = vi2;
3274 join2_cache[hash].res = res;
3275 ////--
3276 return res;
3277}
3278static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003279 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003280}
3281
3282/* create a singleton VTS, namely [thr:1] */
3283static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
sewardj7aa38a92011-02-27 23:04:12 +00003284 temp_max_sized_VTS->usedTS = 0;
3285 VTS__singleton(temp_max_sized_VTS, thr,tym);
3286 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003287}
3288
3289/* tick operation, creates value 1 if specified index is absent */
3290static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3291 VTS* vts = VtsID__to_VTS(vi);
sewardj7aa38a92011-02-27 23:04:12 +00003292 temp_max_sized_VTS->usedTS = 0;
3293 VTS__tick(temp_max_sized_VTS, idx,vts);
3294 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003295}
3296
3297/* index into a VTS (only for assertions) */
3298static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3299 VTS* vts = VtsID__to_VTS(vi);
3300 return VTS__indexAt_SLOW( vts, idx );
3301}
3302
sewardj23f12002009-07-24 08:45:08 +00003303/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3304 any, really) element in vi1 which is pointwise greater-than the
3305 corresponding element in vi2. If no such element exists, return
3306 NULL. This needs to be fairly quick since it is called every time
3307 a race is detected. */
3308static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3309{
3310 VTS *vts1, *vts2;
sewardje4cce742011-02-24 15:25:24 +00003311 Thr* diffthr;
3312 ThrID diffthrid;
sewardj23f12002009-07-24 08:45:08 +00003313 tl_assert(vi1 != vi2);
3314 vts1 = VtsID__to_VTS(vi1);
3315 vts2 = VtsID__to_VTS(vi2);
3316 tl_assert(vts1 != vts2);
sewardje4cce742011-02-24 15:25:24 +00003317 diffthrid = VTS__cmpLEQ(vts1, vts2);
3318 diffthr = Thr__from_ThrID(diffthrid);
sewardj23f12002009-07-24 08:45:08 +00003319 tl_assert(diffthr); /* else they are LEQ ! */
3320 return diffthr;
3321}
3322
3323
3324/////////////////////////////////////////////////////////
3325// //
3326// Filters //
3327// //
3328/////////////////////////////////////////////////////////
3329
sewardj23f12002009-07-24 08:45:08 +00003330/* Forget everything we know -- clear the filter and let everything
3331 through. This needs to be as fast as possible, since it is called
3332 every time the running thread changes, and every time a thread's
3333 vector clocks change, which can be quite frequent. The obvious
3334 fast way to do this is simply to stuff in tags which we know are
3335 not going to match anything, since they're not aligned to the start
3336 of a line. */
florian6bd9dc12012-11-23 16:17:43 +00003337static void Filter__clear ( Filter* fi, const HChar* who )
sewardj23f12002009-07-24 08:45:08 +00003338{
3339 UWord i;
3340 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3341 for (i = 0; i < FI_NUM_LINES; i += 8) {
3342 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3343 fi->tags[i+1] = 1;
3344 fi->tags[i+2] = 1;
3345 fi->tags[i+3] = 1;
3346 fi->tags[i+4] = 1;
3347 fi->tags[i+5] = 1;
3348 fi->tags[i+6] = 1;
3349 fi->tags[i+7] = 1;
3350 }
3351 tl_assert(i == FI_NUM_LINES);
3352}
3353
3354/* Clearing an arbitrary range in the filter. Unfortunately
3355 we have to do this due to core-supplied new/die-mem events. */
3356
3357static void Filter__clear_1byte ( Filter* fi, Addr a )
3358{
3359 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3360 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3361 FiLine* line = &fi->lines[lineno];
3362 UWord loff = (a - atag) / 8;
3363 UShort mask = 0x3 << (2 * (a & 7));
3364 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3365 if (LIKELY( fi->tags[lineno] == atag )) {
3366 /* hit. clear the bits. */
3367 UShort u16 = line->u16s[loff];
3368 line->u16s[loff] = u16 & ~mask; /* clear them */
3369 } else {
3370 /* miss. The filter doesn't hold this address, so ignore. */
3371 }
3372}
3373
3374static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3375{
3376 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3377 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3378 FiLine* line = &fi->lines[lineno];
3379 UWord loff = (a - atag) / 8;
3380 if (LIKELY( fi->tags[lineno] == atag )) {
3381 line->u16s[loff] = 0;
3382 } else {
3383 /* miss. The filter doesn't hold this address, so ignore. */
3384 }
3385}
3386
3387static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3388{
3389 //VG_(printf)("%lu ", len);
3390 /* slowly do part preceding 8-alignment */
3391 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3392 Filter__clear_1byte( fi, a );
3393 a++;
3394 len--;
3395 }
3396 /* vector loop */
3397 while (len >= 8) {
3398 Filter__clear_8bytes_aligned( fi, a );
3399 a += 8;
3400 len -= 8;
3401 }
3402 /* slowly do tail */
3403 while (UNLIKELY(len > 0)) {
3404 Filter__clear_1byte( fi, a );
3405 a++;
3406 len--;
3407 }
3408}
3409
3410
3411/* ------ Read handlers for the filter. ------ */
3412
3413static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3414{
3415 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3416 return False;
3417 {
3418 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3419 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3420 FiLine* line = &fi->lines[lineno];
3421 UWord loff = (a - atag) / 8;
3422 UShort mask = 0xAAAA;
3423 if (LIKELY( fi->tags[lineno] == atag )) {
3424 /* hit. check line and update. */
3425 UShort u16 = line->u16s[loff];
3426 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3427 line->u16s[loff] = u16 | mask; /* set them */
3428 return ok;
3429 } else {
3430 /* miss. nuke existing line and re-use it. */
3431 UWord i;
3432 fi->tags[lineno] = atag;
3433 for (i = 0; i < FI_LINE_SZB / 8; i++)
3434 line->u16s[i] = 0;
3435 line->u16s[loff] = mask;
3436 return False;
3437 }
3438 }
3439}
3440
3441static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3442{
3443 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3444 return False;
3445 {
3446 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3447 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3448 FiLine* line = &fi->lines[lineno];
3449 UWord loff = (a - atag) / 8;
3450 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3451 if (LIKELY( fi->tags[lineno] == atag )) {
3452 /* hit. check line and update. */
3453 UShort u16 = line->u16s[loff];
3454 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3455 line->u16s[loff] = u16 | mask; /* set them */
3456 return ok;
3457 } else {
3458 /* miss. nuke existing line and re-use it. */
3459 UWord i;
3460 fi->tags[lineno] = atag;
3461 for (i = 0; i < FI_LINE_SZB / 8; i++)
3462 line->u16s[i] = 0;
3463 line->u16s[loff] = mask;
3464 return False;
3465 }
3466 }
3467}
3468
3469static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3470{
3471 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3472 return False;
3473 {
3474 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3475 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3476 FiLine* line = &fi->lines[lineno];
3477 UWord loff = (a - atag) / 8;
3478 UShort mask = 0xA << (2 * (a & 6));
3479 /* mask is A000, 0A00, 00A0 or 000A */
3480 if (LIKELY( fi->tags[lineno] == atag )) {
3481 /* hit. check line and update. */
3482 UShort u16 = line->u16s[loff];
3483 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3484 line->u16s[loff] = u16 | mask; /* set them */
3485 return ok;
3486 } else {
3487 /* miss. nuke existing line and re-use it. */
3488 UWord i;
3489 fi->tags[lineno] = atag;
3490 for (i = 0; i < FI_LINE_SZB / 8; i++)
3491 line->u16s[i] = 0;
3492 line->u16s[loff] = mask;
3493 return False;
3494 }
3495 }
3496}
3497
3498static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3499{
3500 {
3501 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3502 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3503 FiLine* line = &fi->lines[lineno];
3504 UWord loff = (a - atag) / 8;
3505 UShort mask = 0x2 << (2 * (a & 7));
3506 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3507 if (LIKELY( fi->tags[lineno] == atag )) {
3508 /* hit. check line and update. */
3509 UShort u16 = line->u16s[loff];
3510 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3511 line->u16s[loff] = u16 | mask; /* set them */
3512 return ok;
3513 } else {
3514 /* miss. nuke existing line and re-use it. */
3515 UWord i;
3516 fi->tags[lineno] = atag;
3517 for (i = 0; i < FI_LINE_SZB / 8; i++)
3518 line->u16s[i] = 0;
3519 line->u16s[loff] = mask;
3520 return False;
3521 }
3522 }
3523}
3524
3525
3526/* ------ Write handlers for the filter. ------ */
3527
3528static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3529{
3530 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3531 return False;
3532 {
3533 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3534 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3535 FiLine* line = &fi->lines[lineno];
3536 UWord loff = (a - atag) / 8;
3537 UShort mask = 0xFFFF;
3538 if (LIKELY( fi->tags[lineno] == atag )) {
3539 /* hit. check line and update. */
3540 UShort u16 = line->u16s[loff];
3541 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3542 line->u16s[loff] = u16 | mask; /* set them */
3543 return ok;
3544 } else {
3545 /* miss. nuke existing line and re-use it. */
3546 UWord i;
3547 fi->tags[lineno] = atag;
3548 for (i = 0; i < FI_LINE_SZB / 8; i++)
3549 line->u16s[i] = 0;
3550 line->u16s[loff] = mask;
3551 return False;
3552 }
3553 }
3554}
3555
3556static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3557{
3558 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3559 return False;
3560 {
3561 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3562 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3563 FiLine* line = &fi->lines[lineno];
3564 UWord loff = (a - atag) / 8;
3565 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3566 if (LIKELY( fi->tags[lineno] == atag )) {
3567 /* hit. check line and update. */
3568 UShort u16 = line->u16s[loff];
3569 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3570 line->u16s[loff] = u16 | mask; /* set them */
3571 return ok;
3572 } else {
3573 /* miss. nuke existing line and re-use it. */
3574 UWord i;
3575 fi->tags[lineno] = atag;
3576 for (i = 0; i < FI_LINE_SZB / 8; i++)
3577 line->u16s[i] = 0;
3578 line->u16s[loff] = mask;
3579 return False;
3580 }
3581 }
3582}
3583
3584static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3585{
3586 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3587 return False;
3588 {
3589 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3590 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3591 FiLine* line = &fi->lines[lineno];
3592 UWord loff = (a - atag) / 8;
3593 UShort mask = 0xF << (2 * (a & 6));
3594 /* mask is F000, 0F00, 00F0 or 000F */
3595 if (LIKELY( fi->tags[lineno] == atag )) {
3596 /* hit. check line and update. */
3597 UShort u16 = line->u16s[loff];
3598 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3599 line->u16s[loff] = u16 | mask; /* set them */
3600 return ok;
3601 } else {
3602 /* miss. nuke existing line and re-use it. */
3603 UWord i;
3604 fi->tags[lineno] = atag;
3605 for (i = 0; i < FI_LINE_SZB / 8; i++)
3606 line->u16s[i] = 0;
3607 line->u16s[loff] = mask;
3608 return False;
3609 }
3610 }
3611}
3612
3613static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3614{
3615 {
3616 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3617 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3618 FiLine* line = &fi->lines[lineno];
3619 UWord loff = (a - atag) / 8;
3620 UShort mask = 0x3 << (2 * (a & 7));
3621 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3622 if (LIKELY( fi->tags[lineno] == atag )) {
3623 /* hit. check line and update. */
3624 UShort u16 = line->u16s[loff];
3625 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3626 line->u16s[loff] = u16 | mask; /* set them */
3627 return ok;
3628 } else {
3629 /* miss. nuke existing line and re-use it. */
3630 UWord i;
3631 fi->tags[lineno] = atag;
3632 for (i = 0; i < FI_LINE_SZB / 8; i++)
3633 line->u16s[i] = 0;
3634 line->u16s[loff] = mask;
3635 return False;
3636 }
3637 }
3638}
3639
sewardjf98e1c02008-10-25 16:22:41 +00003640
3641/////////////////////////////////////////////////////////
3642// //
3643// Threads //
3644// //
3645/////////////////////////////////////////////////////////
3646
sewardje4cce742011-02-24 15:25:24 +00003647/* Maps ThrID values to their Thr*s (which contain ThrID values that
3648 should point back to the relevant slot in the array. Lowest
3649 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
3650static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
3651
3652/* And a counter to dole out ThrID values. For rationale/background,
3653 see comments on definition of ScalarTS (far) above. */
sewardj7aa38a92011-02-27 23:04:12 +00003654static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
sewardje4cce742011-02-24 15:25:24 +00003655
3656static ThrID Thr__to_ThrID ( Thr* thr ) {
3657 return thr->thrid;
3658}
3659static Thr* Thr__from_ThrID ( UInt thrid ) {
3660 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
3661 tl_assert(thr->thrid == thrid);
3662 return thr;
3663}
3664
3665static Thr* Thr__new ( void )
3666{
sewardjf98e1c02008-10-25 16:22:41 +00003667 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
3668 thr->viR = VtsID_INVALID;
3669 thr->viW = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003670 thr->llexit_done = False;
3671 thr->joinedwith_done = False;
sewardj23f12002009-07-24 08:45:08 +00003672 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
philippeca903bb2014-04-26 22:50:08 +00003673 if (HG_(clo_history_level) == 1)
3674 thr->local_Kws_n_stacks
3675 = VG_(newXA)( HG_(zalloc),
3676 "libhb.Thr__new.3 (local_Kws_and_stacks)",
3677 HG_(free), sizeof(ULong_n_EC) );
sewardje4cce742011-02-24 15:25:24 +00003678
3679 /* Add this Thr* <-> ThrID binding to the mapping, and
3680 cross-check */
3681 if (!thrid_to_thr_map) {
3682 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
3683 HG_(free), sizeof(Thr*) );
sewardje4cce742011-02-24 15:25:24 +00003684 }
3685
sewardj7aa38a92011-02-27 23:04:12 +00003686 if (thrid_counter >= ThrID_MAX_VALID) {
sewardje4cce742011-02-24 15:25:24 +00003687 /* We're hosed. We have to stop. */
3688 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
3689 }
3690
3691 thr->thrid = thrid_counter++;
3692 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
3693 tl_assert(ix + 1024 == thr->thrid);
3694
sewardjf98e1c02008-10-25 16:22:41 +00003695 return thr;
3696}
3697
sewardj8ab2c132009-08-02 09:34:35 +00003698static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00003699{
3700 Word nPresent;
3701 ULong_n_EC pair;
3702 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00003703
3704 // We only collect this info at history level 1 (approx)
3705 if (HG_(clo_history_level) != 1)
3706 return;
3707
sewardj8ab2c132009-08-02 09:34:35 +00003708 /* This is the scalar Kw for thr. */
3709 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00003710 pair.ec = main_get_EC( thr );
3711 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00003712 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00003713
3714 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00003715 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00003716
3717 /* Throw away old stacks, if necessary. We can't accumulate stuff
3718 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00003719 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
3720 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
3721 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
3722 if (0)
3723 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00003724 thr, pair.ull, pair.ec );
3725 }
3726
3727 if (nPresent > 0) {
3728 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00003729 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
3730 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00003731 }
3732
3733 if (nPresent == 0)
3734 pair.ec = NULL;
3735
sewardj8ab2c132009-08-02 09:34:35 +00003736 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00003737
3738 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00003739 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00003740 thr, pair.ull, pair.ec );
3741 if (0)
3742 VG_(pp_ExeContext)(pair.ec);
3743}
3744
florian6bd9dc12012-11-23 16:17:43 +00003745static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
3746 const ULong_n_EC* pair2 )
sewardj23f12002009-07-24 08:45:08 +00003747{
3748 if (pair1->ull < pair2->ull) return -1;
3749 if (pair1->ull > pair2->ull) return 1;
3750 return 0;
3751}
3752
sewardjf98e1c02008-10-25 16:22:41 +00003753
3754/////////////////////////////////////////////////////////
3755// //
3756// Shadow Values //
3757// //
3758/////////////////////////////////////////////////////////
3759
3760// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
3761// hb_zsm.h. We have to do everything else here.
3762
3763/* SVal is 64 bit unsigned int.
3764
3765 <---------30---------> <---------30--------->
3766 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00003767 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00003768 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
3769
sewardjf98e1c02008-10-25 16:22:41 +00003770*/
3771#define SVAL_TAGMASK (3ULL << 62)
3772
3773static inline Bool SVal__isC ( SVal s ) {
3774 return (0ULL << 62) == (s & SVAL_TAGMASK);
3775}
3776static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
3777 //tl_assert(VtsID__is_valid(rmini));
3778 //tl_assert(VtsID__is_valid(wmini));
3779 return (((ULong)rmini) << 32) | ((ULong)wmini);
3780}
3781static inline VtsID SVal__unC_Rmin ( SVal s ) {
3782 tl_assert(SVal__isC(s));
3783 return (VtsID)(s >> 32);
3784}
3785static inline VtsID SVal__unC_Wmin ( SVal s ) {
3786 tl_assert(SVal__isC(s));
3787 return (VtsID)(s & 0xFFFFFFFFULL);
3788}
3789
sewardj23f12002009-07-24 08:45:08 +00003790static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00003791 return (2ULL << 62) == (s & SVAL_TAGMASK);
3792}
sewardj5aa09bf2014-06-20 14:25:53 +00003793__attribute__((unused))
sewardj23f12002009-07-24 08:45:08 +00003794static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00003795 return 2ULL << 62;
3796}
3797
3798/* Direct callback from lib_zsm. */
3799static void SVal__rcinc ( SVal s ) {
3800 if (SVal__isC(s)) {
3801 VtsID__rcinc( SVal__unC_Rmin(s) );
3802 VtsID__rcinc( SVal__unC_Wmin(s) );
3803 }
3804}
3805
3806/* Direct callback from lib_zsm. */
3807static void SVal__rcdec ( SVal s ) {
3808 if (SVal__isC(s)) {
3809 VtsID__rcdec( SVal__unC_Rmin(s) );
3810 VtsID__rcdec( SVal__unC_Wmin(s) );
3811 }
3812}
3813
3814
3815/////////////////////////////////////////////////////////
3816// //
3817// Change-event map2 //
3818// //
3819/////////////////////////////////////////////////////////
3820
sewardjf98e1c02008-10-25 16:22:41 +00003821#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
3822
3823/* This is in two parts:
3824
sewardj23f12002009-07-24 08:45:08 +00003825 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00003826 traces. When the reference count of a stack trace becomes zero,
3827 it is removed from the set and freed up. The intent is to have
3828 a set of stack traces which can be referred to from (2), but to
3829 only represent each one once. The set is indexed/searched by
3830 ordering on the stack trace vectors.
3831
sewardj849b0ed2008-12-21 10:43:10 +00003832 2. A SparseWA of OldRefs. These store information about each old
3833 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00003834 location for which the information is recorded. For LRU
3835 purposes, each OldRef also contains a generation number,
3836 indicating when it was most recently accessed.
3837
3838 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00003839 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3840 size) triples to RCECs. This allows us to collect the last
3841 access-traceback by up to N_OLDREF_ACCS different triples for
3842 this location. The accs[] array is a MTF-array. If a binding
3843 falls off the end, that's too bad -- we will lose info about
3844 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00003845
sewardj849b0ed2008-12-21 10:43:10 +00003846 When the SparseWA becomes too big, we can throw away the OldRefs
sewardjf98e1c02008-10-25 16:22:41 +00003847 whose generation numbers are below some threshold; hence doing
3848 approximate LRU discarding. For each discarded OldRef we must
3849 of course decrement the reference count on the all RCECs it
3850 refers to, in order that entries from (1) eventually get
3851 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00003852
3853 A major improvement in reliability of this mechanism would be to
3854 have a dynamically sized OldRef.accs[] array, so no entries ever
3855 fall off the end. In investigations (Dec 08) it appears that a
3856 major cause for the non-availability of conflicting-access traces
3857 in race reports is caused by the fixed size of this array. I
3858 suspect for most OldRefs, only a few entries are used, but for a
3859 minority of cases there is an overflow, leading to info lossage.
3860 Investigations also suggest this is very workload and scheduling
3861 sensitive. Therefore a dynamic sizing would be better.
3862
philippe6643e962012-01-17 21:16:30 +00003863 However, dynamic sizing would defeat the use of a PoolAllocator
sewardj849b0ed2008-12-21 10:43:10 +00003864 for OldRef structures. And that's important for performance. So
3865 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00003866*/
3867
3868
3869static UWord stats__ctxt_rcdec1 = 0;
3870static UWord stats__ctxt_rcdec2 = 0;
3871static UWord stats__ctxt_rcdec3 = 0;
3872static UWord stats__ctxt_rcdec_calls = 0;
3873static UWord stats__ctxt_rcdec_discards = 0;
3874static UWord stats__ctxt_rcdec1_eq = 0;
3875
3876static UWord stats__ctxt_tab_curr = 0;
3877static UWord stats__ctxt_tab_max = 0;
3878
3879static UWord stats__ctxt_tab_qs = 0;
3880static UWord stats__ctxt_tab_cmps = 0;
3881
3882
3883///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00003884//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00003885///
3886
3887#define N_FRAMES 8
3888
3889// (UInt) `echo "Reference Counted Execution Context" | md5sum`
3890#define RCEC_MAGIC 0xab88abb2UL
3891
3892//#define N_RCEC_TAB 98317 /* prime */
3893#define N_RCEC_TAB 196613 /* prime */
3894
3895typedef
3896 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00003897 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003898 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00003899 UWord rc;
3900 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00003901 UWord frames_hash; /* hash of all the frames */
3902 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00003903 }
3904 RCEC;
3905
3906static RCEC** contextTab = NULL; /* hash table of RCEC*s */
3907
3908
3909/* Gives an arbitrary total order on RCEC .frames fields */
3910static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
3911 Word i;
3912 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
3913 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00003914 if (ec1->frames_hash < ec2->frames_hash) return -1;
3915 if (ec1->frames_hash > ec2->frames_hash) return 1;
3916 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003917 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00003918 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00003919 }
3920 return 0;
3921}
3922
3923
3924/* Dec the ref of this RCEC. */
3925static void ctxt__rcdec ( RCEC* ec )
3926{
3927 stats__ctxt_rcdec_calls++;
3928 tl_assert(ec && ec->magic == RCEC_MAGIC);
3929 tl_assert(ec->rc > 0);
3930 ec->rc--;
3931}
3932
3933static void ctxt__rcinc ( RCEC* ec )
3934{
3935 tl_assert(ec && ec->magic == RCEC_MAGIC);
3936 ec->rc++;
3937}
3938
3939
philippe6643e962012-01-17 21:16:30 +00003940//////////// BEGIN RCEC pool allocator
3941static PoolAlloc* rcec_pool_allocator;
sewardjd86e3a22008-12-03 11:39:37 +00003942
3943static RCEC* alloc_RCEC ( void ) {
philippe6643e962012-01-17 21:16:30 +00003944 return VG_(allocEltPA) ( rcec_pool_allocator );
sewardjd86e3a22008-12-03 11:39:37 +00003945}
3946
3947static void free_RCEC ( RCEC* rcec ) {
3948 tl_assert(rcec->magic == RCEC_MAGIC);
philippe6643e962012-01-17 21:16:30 +00003949 VG_(freeEltPA)( rcec_pool_allocator, rcec );
sewardjd86e3a22008-12-03 11:39:37 +00003950}
philippe6643e962012-01-17 21:16:30 +00003951//////////// END RCEC pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00003952
3953
sewardjf98e1c02008-10-25 16:22:41 +00003954/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
3955 move it one step closer the the front of the list, so as to make
3956 subsequent searches for it cheaper. */
3957static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
3958{
3959 RCEC *ec0, *ec1, *ec2;
3960 if (ec == *headp)
3961 tl_assert(0); /* already at head of list */
3962 tl_assert(ec != NULL);
3963 ec0 = *headp;
3964 ec1 = NULL;
3965 ec2 = NULL;
3966 while (True) {
3967 if (ec0 == NULL || ec0 == ec) break;
3968 ec2 = ec1;
3969 ec1 = ec0;
3970 ec0 = ec0->next;
3971 }
3972 tl_assert(ec0 == ec);
3973 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
3974 RCEC* tmp;
3975 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
3976 predecessor. Swap ec0 and ec1, that is, move ec0 one step
3977 closer to the start of the list. */
3978 tl_assert(ec2->next == ec1);
3979 tl_assert(ec1->next == ec0);
3980 tmp = ec0->next;
3981 ec2->next = ec0;
3982 ec0->next = ec1;
3983 ec1->next = tmp;
3984 }
3985 else
3986 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
3987 /* it's second in the list. */
3988 tl_assert(*headp == ec1);
3989 tl_assert(ec1->next == ec0);
3990 ec1->next = ec0->next;
3991 ec0->next = ec1;
3992 *headp = ec0;
3993 }
3994}
3995
3996
3997/* Find the given RCEC in the tree, and return a pointer to it. Or,
3998 if not present, add the given one to the tree (by making a copy of
3999 it, so the caller can immediately deallocate the original) and
4000 return a pointer to the copy. The caller can safely have 'example'
4001 on its stack, since we will always return a pointer to a copy of
4002 it, not to the original. Note that the inserted node will have .rc
4003 of zero and so the caller must immediatly increment it. */
4004__attribute__((noinline))
4005static RCEC* ctxt__find_or_add ( RCEC* example )
4006{
4007 UWord hent;
4008 RCEC* copy;
4009 tl_assert(example && example->magic == RCEC_MAGIC);
4010 tl_assert(example->rc == 0);
4011
4012 /* Search the hash table to see if we already have it. */
4013 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00004014 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00004015 copy = contextTab[hent];
4016 while (1) {
4017 if (!copy) break;
4018 tl_assert(copy->magic == RCEC_MAGIC);
4019 stats__ctxt_tab_cmps++;
4020 if (0 == RCEC__cmp_by_frames(copy, example)) break;
4021 copy = copy->next;
4022 }
4023
4024 if (copy) {
4025 tl_assert(copy != example);
4026 /* optimisation: if it's not at the head of its list, move 1
4027 step fwds, to make future searches cheaper */
4028 if (copy != contextTab[hent]) {
4029 move_RCEC_one_step_forward( &contextTab[hent], copy );
4030 }
4031 } else {
sewardjd86e3a22008-12-03 11:39:37 +00004032 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00004033 tl_assert(copy != example);
4034 *copy = *example;
4035 copy->next = contextTab[hent];
4036 contextTab[hent] = copy;
4037 stats__ctxt_tab_curr++;
4038 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4039 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4040 }
4041 return copy;
4042}
4043
4044static inline UWord ROLW ( UWord w, Int n )
4045{
4046 Int bpw = 8 * sizeof(UWord);
4047 w = (w << n) | (w >> (bpw-n));
4048 return w;
4049}
4050
4051__attribute__((noinline))
4052static RCEC* get_RCEC ( Thr* thr )
4053{
4054 UWord hash, i;
4055 RCEC example;
4056 example.magic = RCEC_MAGIC;
4057 example.rc = 0;
4058 example.rcX = 0;
florian195623b2013-01-22 00:25:05 +00004059 example.next = NULL;
njn6c83d5e2009-05-05 23:46:24 +00004060 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00004061 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00004062 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004063 hash ^= example.frames[i];
4064 hash = ROLW(hash, 19);
4065 }
njn6c83d5e2009-05-05 23:46:24 +00004066 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00004067 return ctxt__find_or_add( &example );
4068}
4069
4070///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00004071//// Part (2):
4072/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00004073///
4074
4075// (UInt) `echo "Old Reference Information" | md5sum`
4076#define OldRef_MAGIC 0x30b1f075UL
4077
sewardjffce8152011-06-24 10:09:41 +00004078/* Records an access: a thread, a context (size & writeness) and the
4079 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
4080 2, 10 = 4, 11 = 8.
sewardjc5ea9962008-12-07 01:41:46 +00004081*/
sewardjffce8152011-06-24 10:09:41 +00004082typedef
4083 struct {
4084 RCEC* rcec;
4085 WordSetID locksHeldW;
4086 UInt thrid : SCALARTS_N_THRBITS;
4087 UInt szLg2B : 2;
4088 UInt isW : 1;
4089 }
4090 Thr_n_RCEC;
sewardjf98e1c02008-10-25 16:22:41 +00004091
sewardj849b0ed2008-12-21 10:43:10 +00004092#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00004093
4094typedef
4095 struct {
sewardjd86e3a22008-12-03 11:39:37 +00004096 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00004097 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00004098 /* or free list when not in use */
sewardjffce8152011-06-24 10:09:41 +00004099 /* unused slots in this array have .thrid == 0, which is invalid */
sewardjf98e1c02008-10-25 16:22:41 +00004100 Thr_n_RCEC accs[N_OLDREF_ACCS];
4101 }
4102 OldRef;
4103
sewardjd86e3a22008-12-03 11:39:37 +00004104
philippe6643e962012-01-17 21:16:30 +00004105//////////// BEGIN OldRef pool allocator
4106static PoolAlloc* oldref_pool_allocator;
sewardjd86e3a22008-12-03 11:39:37 +00004107
4108static OldRef* alloc_OldRef ( void ) {
philippe6643e962012-01-17 21:16:30 +00004109 return VG_(allocEltPA) ( oldref_pool_allocator );
sewardjd86e3a22008-12-03 11:39:37 +00004110}
4111
4112static void free_OldRef ( OldRef* r ) {
4113 tl_assert(r->magic == OldRef_MAGIC);
philippe6643e962012-01-17 21:16:30 +00004114 VG_(freeEltPA)( oldref_pool_allocator, r );
sewardjd86e3a22008-12-03 11:39:37 +00004115}
philippe6643e962012-01-17 21:16:30 +00004116//////////// END OldRef pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00004117
sewardjd86e3a22008-12-03 11:39:37 +00004118
sewardjbc307e52008-12-06 22:10:54 +00004119static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
4120static UWord oldrefGen = 0; /* current LRU generation # */
4121static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
4122static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00004123
sewardj1669cc72008-12-13 01:20:21 +00004124inline static UInt min_UInt ( UInt a, UInt b ) {
4125 return a < b ? a : b;
4126}
4127
sewardja781be62008-12-08 00:12:28 +00004128/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4129 first interval is lower, 1 if the first interval is higher, and 0
4130 if there is any overlap. Redundant paranoia with casting is there
4131 following what looked distinctly like a bug in gcc-4.1.2, in which
4132 some of the comparisons were done signedly instead of
4133 unsignedly. */
4134/* Copied from exp-ptrcheck/sg_main.c */
4135static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4136 Addr a2, SizeT n2 ) {
4137 UWord a1w = (UWord)a1;
4138 UWord n1w = (UWord)n1;
4139 UWord a2w = (UWord)a2;
4140 UWord n2w = (UWord)n2;
4141 tl_assert(n1w > 0 && n2w > 0);
4142 if (a1w + n1w <= a2w) return -1L;
4143 if (a2w + n2w <= a1w) return 1L;
4144 return 0;
4145}
4146
sewardjc5ea9962008-12-07 01:41:46 +00004147static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00004148{
sewardjd86e3a22008-12-03 11:39:37 +00004149 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00004150 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00004151 Word i, j;
philippe40648e22015-04-11 11:42:22 +00004152 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004153 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004154
sewardjffce8152011-06-24 10:09:41 +00004155 tl_assert(thr);
4156 ThrID thrid = thr->thrid;
4157 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4158
4159 WordSetID locksHeldW = thr->hgthread->locksetW;
4160
sewardjc5ea9962008-12-07 01:41:46 +00004161 rcec = get_RCEC( thr );
4162 ctxt__rcinc(rcec);
4163
sewardjffce8152011-06-24 10:09:41 +00004164 UInt szLg2B = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004165 switch (szB) {
4166 /* This doesn't look particularly branch-predictor friendly. */
sewardjffce8152011-06-24 10:09:41 +00004167 case 1: szLg2B = 0; break;
4168 case 2: szLg2B = 1; break;
4169 case 4: szLg2B = 2; break;
4170 case 8: szLg2B = 3; break;
sewardjc5ea9962008-12-07 01:41:46 +00004171 default: tl_assert(0);
4172 }
4173
sewardjffce8152011-06-24 10:09:41 +00004174 /* Look in the map to see if we already have a record for this
4175 address. */
philippe40648e22015-04-11 11:42:22 +00004176 b = VG_(lookupSWA)( oldrefTree, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00004177
sewardjd86e3a22008-12-03 11:39:37 +00004178 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00004179
4180 /* We already have a record for this address. We now need to
sewardjffce8152011-06-24 10:09:41 +00004181 see if we have a stack trace pertaining to this (thrid, R/W,
sewardj849b0ed2008-12-21 10:43:10 +00004182 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00004183 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004184 tl_assert(ref->magic == OldRef_MAGIC);
4185
sewardjf98e1c02008-10-25 16:22:41 +00004186 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004187 if (ref->accs[i].thrid != thrid)
sewardj849b0ed2008-12-21 10:43:10 +00004188 continue;
sewardjffce8152011-06-24 10:09:41 +00004189 if (ref->accs[i].szLg2B != szLg2B)
4190 continue;
4191 if (ref->accs[i].isW != (UInt)(isW & 1))
sewardj849b0ed2008-12-21 10:43:10 +00004192 continue;
4193 /* else we have a match, so stop looking. */
4194 break;
sewardjf98e1c02008-10-25 16:22:41 +00004195 }
4196
4197 if (i < N_OLDREF_ACCS) {
sewardjffce8152011-06-24 10:09:41 +00004198 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */
sewardjf98e1c02008-10-25 16:22:41 +00004199 if (i > 0) {
4200 Thr_n_RCEC tmp = ref->accs[i-1];
4201 ref->accs[i-1] = ref->accs[i];
4202 ref->accs[i] = tmp;
4203 i--;
4204 }
sewardjc5ea9962008-12-07 01:41:46 +00004205 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00004206 stats__ctxt_rcdec1++;
sewardjffce8152011-06-24 10:09:41 +00004207 ctxt__rcdec( ref->accs[i].rcec );
4208 tl_assert(ref->accs[i].thrid == thrid);
4209 /* Update the RCEC and the W-held lockset. */
4210 ref->accs[i].rcec = rcec;
4211 ref->accs[i].locksHeldW = locksHeldW;
sewardjf98e1c02008-10-25 16:22:41 +00004212 } else {
sewardjffce8152011-06-24 10:09:41 +00004213 /* No entry for this (thread, R/W, size, nWHeld) quad.
4214 Shuffle all of them down one slot, and put the new entry
4215 at the start of the array. */
4216 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
sewardjf98e1c02008-10-25 16:22:41 +00004217 /* the last slot is in use. We must dec the rc on the
4218 associated rcec. */
4219 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
4220 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00004221 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
4222 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjffce8152011-06-24 10:09:41 +00004223 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
sewardjf98e1c02008-10-25 16:22:41 +00004224 } else {
4225 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
4226 }
4227 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
4228 ref->accs[j] = ref->accs[j-1];
sewardjffce8152011-06-24 10:09:41 +00004229 ref->accs[0].thrid = thrid;
4230 ref->accs[0].szLg2B = szLg2B;
4231 ref->accs[0].isW = (UInt)(isW & 1);
4232 ref->accs[0].locksHeldW = locksHeldW;
4233 ref->accs[0].rcec = rcec;
4234 /* thrid==0 is used to signify an empty slot, so we can't
4235 add zero thrid (such a ThrID is invalid anyway). */
4236 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
sewardjf98e1c02008-10-25 16:22:41 +00004237 }
4238
4239 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00004240
4241 } else {
4242
4243 /* We don't have a record for this address. Create a new one. */
4244 if (oldrefTreeN >= oldrefGenIncAt) {
4245 oldrefGen++;
4246 oldrefGenIncAt = oldrefTreeN + 50000;
4247 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
4248 oldrefGen, oldrefTreeN );
4249 }
sewardjd86e3a22008-12-03 11:39:37 +00004250
4251 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00004252 ref->magic = OldRef_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00004253 ref->gen = oldrefGen;
4254 ref->accs[0].thrid = thrid;
4255 ref->accs[0].szLg2B = szLg2B;
4256 ref->accs[0].isW = (UInt)(isW & 1);
4257 ref->accs[0].locksHeldW = locksHeldW;
4258 ref->accs[0].rcec = rcec;
4259
4260 /* thrid==0 is used to signify an empty slot, so we can't
4261 add zero thrid (such a ThrID is invalid anyway). */
4262 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4263
4264 /* Clear out the rest of the entries */
sewardjf98e1c02008-10-25 16:22:41 +00004265 for (j = 1; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004266 ref->accs[j].rcec = NULL;
4267 ref->accs[j].thrid = 0;
4268 ref->accs[j].szLg2B = 0;
4269 ref->accs[j].isW = 0;
4270 ref->accs[j].locksHeldW = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004271 }
sewardjbc307e52008-12-06 22:10:54 +00004272 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00004273 oldrefTreeN++;
4274
4275 }
4276}
4277
4278
sewardjffce8152011-06-24 10:09:41 +00004279/* Extract info from the conflicting-access machinery. */
sewardjc5ea9962008-12-07 01:41:46 +00004280Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjffce8152011-06-24 10:09:41 +00004281 /*OUT*/Thr** resThr,
4282 /*OUT*/SizeT* resSzB,
4283 /*OUT*/Bool* resIsW,
4284 /*OUT*/WordSetID* locksHeldW,
sewardjc5ea9962008-12-07 01:41:46 +00004285 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00004286{
sewardja781be62008-12-08 00:12:28 +00004287 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00004288 OldRef* ref;
philippe40648e22015-04-11 11:42:22 +00004289 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004290 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004291
sewardjffce8152011-06-24 10:09:41 +00004292 ThrID cand_thrid;
4293 RCEC* cand_rcec;
4294 Bool cand_isW;
4295 SizeT cand_szB;
4296 WordSetID cand_locksHeldW;
4297 Addr cand_a;
sewardja781be62008-12-08 00:12:28 +00004298
4299 Addr toCheck[15];
4300 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004301
4302 tl_assert(thr);
4303 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00004304
sewardjffce8152011-06-24 10:09:41 +00004305 ThrID thrid = thr->thrid;
4306
sewardja781be62008-12-08 00:12:28 +00004307 toCheck[nToCheck++] = a;
4308 for (i = -7; i < (Word)szB; i++) {
4309 if (i != 0)
4310 toCheck[nToCheck++] = a + i;
4311 }
4312 tl_assert(nToCheck <= 15);
4313
4314 /* Now see if we can find a suitable matching event for
4315 any of the addresses in toCheck[0 .. nToCheck-1]. */
4316 for (j = 0; j < nToCheck; j++) {
4317
4318 cand_a = toCheck[j];
4319 // VG_(printf)("test %ld %p\n", j, cand_a);
4320
philippe40648e22015-04-11 11:42:22 +00004321 b = VG_(lookupSWA)( oldrefTree, &valW, cand_a );
sewardja781be62008-12-08 00:12:28 +00004322 if (!b)
4323 continue;
4324
sewardjd86e3a22008-12-03 11:39:37 +00004325 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004326 tl_assert(ref->magic == OldRef_MAGIC);
sewardjffce8152011-06-24 10:09:41 +00004327 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
sewardjf98e1c02008-10-25 16:22:41 +00004328
sewardjffce8152011-06-24 10:09:41 +00004329 cand_thrid = 0; /* invalid; see comments in event_map_bind */
4330 cand_rcec = NULL;
4331 cand_isW = False;
4332 cand_szB = 0;
4333 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
sewardjf98e1c02008-10-25 16:22:41 +00004334
sewardjc5ea9962008-12-07 01:41:46 +00004335 for (i = 0; i < N_OLDREF_ACCS; i++) {
4336 Thr_n_RCEC* cand = &ref->accs[i];
sewardjffce8152011-06-24 10:09:41 +00004337 cand_rcec = cand->rcec;
4338 cand_thrid = cand->thrid;
4339 cand_isW = (Bool)cand->isW;
4340 cand_szB = 1 << cand->szLg2B;
4341 cand_locksHeldW = cand->locksHeldW;
sewardjc5ea9962008-12-07 01:41:46 +00004342
sewardjffce8152011-06-24 10:09:41 +00004343 if (cand_thrid == 0)
sewardjc5ea9962008-12-07 01:41:46 +00004344 /* This slot isn't in use. Ignore it. */
4345 continue;
4346
sewardjffce8152011-06-24 10:09:41 +00004347 if (cand_thrid == thrid)
sewardjc5ea9962008-12-07 01:41:46 +00004348 /* This is an access by the same thread, but we're only
4349 interested in accesses from other threads. Ignore. */
4350 continue;
4351
4352 if ((!cand_isW) && (!isW))
4353 /* We don't want to report a read racing against another
4354 read; that's stupid. So in this case move on. */
4355 continue;
4356
sewardja781be62008-12-08 00:12:28 +00004357 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
4358 /* No overlap with the access we're asking about. Ignore. */
4359 continue;
4360
sewardjc5ea9962008-12-07 01:41:46 +00004361 /* We have a match. Stop searching. */
4362 break;
4363 }
4364
4365 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
4366
sewardja781be62008-12-08 00:12:28 +00004367 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00004368 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00004369 /* return with success */
sewardjffce8152011-06-24 10:09:41 +00004370 tl_assert(cand_thrid);
sewardja781be62008-12-08 00:12:28 +00004371 tl_assert(cand_rcec);
4372 tl_assert(cand_rcec->magic == RCEC_MAGIC);
4373 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00004374 /* Count how many non-zero frames we have. */
4375 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4376 for (n = 0; n < maxNFrames; n++) {
4377 if (0 == cand_rcec->frames[n]) break;
4378 }
sewardjffce8152011-06-24 10:09:41 +00004379 *resEC = VG_(make_ExeContext_from_StackTrace)
4380 (cand_rcec->frames, n);
4381 *resThr = Thr__from_ThrID(cand_thrid);
4382 *resSzB = cand_szB;
4383 *resIsW = cand_isW;
4384 *locksHeldW = cand_locksHeldW;
sewardja781be62008-12-08 00:12:28 +00004385 return True;
4386 }
sewardjc5ea9962008-12-07 01:41:46 +00004387
sewardja781be62008-12-08 00:12:28 +00004388 /* consider next address in toCheck[] */
4389 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00004390
sewardja781be62008-12-08 00:12:28 +00004391 /* really didn't find anything. */
4392 return False;
sewardjf98e1c02008-10-25 16:22:41 +00004393}
4394
4395static void event_map_init ( void )
4396{
4397 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00004398
philippe6643e962012-01-17 21:16:30 +00004399 /* Context (RCEC) pool allocator */
4400 rcec_pool_allocator = VG_(newPA) (
4401 sizeof(RCEC),
4402 1000 /* RCECs per pool */,
4403 HG_(zalloc),
4404 "libhb.event_map_init.1 (RCEC pools)",
4405 HG_(free)
4406 );
sewardjd86e3a22008-12-03 11:39:37 +00004407
4408 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00004409 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00004410 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00004411 N_RCEC_TAB * sizeof(RCEC*) );
sewardjf98e1c02008-10-25 16:22:41 +00004412 for (i = 0; i < N_RCEC_TAB; i++)
4413 contextTab[i] = NULL;
4414
philippe6643e962012-01-17 21:16:30 +00004415 /* Oldref pool allocator */
4416 oldref_pool_allocator = VG_(newPA)(
4417 sizeof(OldRef),
4418 1000 /* OldRefs per pool */,
4419 HG_(zalloc),
4420 "libhb.event_map_init.3 (OldRef pools)",
4421 HG_(free)
4422 );
sewardjd86e3a22008-12-03 11:39:37 +00004423
sewardjd86e3a22008-12-03 11:39:37 +00004424 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00004425 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00004426 oldrefTree = VG_(newSWA)(
4427 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00004428 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00004429 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00004430 );
sewardjf98e1c02008-10-25 16:22:41 +00004431
4432 oldrefGen = 0;
4433 oldrefGenIncAt = 0;
4434 oldrefTreeN = 0;
4435}
4436
4437static void event_map__check_reference_counts ( Bool before )
4438{
4439 RCEC* rcec;
4440 OldRef* oldref;
4441 Word i;
4442 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00004443 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00004444
4445 /* Set the 'check' reference counts to zero. Also, optionally
4446 check that the real reference counts are non-zero. We allow
4447 these to fall to zero before a GC, but the GC must get rid of
4448 all those that are zero, hence none should be zero after a
4449 GC. */
4450 for (i = 0; i < N_RCEC_TAB; i++) {
4451 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4452 nEnts++;
4453 tl_assert(rcec);
4454 tl_assert(rcec->magic == RCEC_MAGIC);
4455 if (!before)
4456 tl_assert(rcec->rc > 0);
4457 rcec->rcX = 0;
4458 }
4459 }
4460
4461 /* check that the stats are sane */
4462 tl_assert(nEnts == stats__ctxt_tab_curr);
4463 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4464
4465 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00004466 VG_(initIterSWA)( oldrefTree );
4467 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004468 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004469 tl_assert(oldref->magic == OldRef_MAGIC);
4470 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004471 ThrID aThrID = oldref->accs[i].thrid;
4472 RCEC* aRef = oldref->accs[i].rcec;
4473 if (aThrID != 0) {
sewardjc5ea9962008-12-07 01:41:46 +00004474 tl_assert(aRef);
4475 tl_assert(aRef->magic == RCEC_MAGIC);
4476 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00004477 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004478 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00004479 }
4480 }
4481 }
4482
4483 /* compare check ref counts with actual */
4484 for (i = 0; i < N_RCEC_TAB; i++) {
4485 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4486 tl_assert(rcec->rc == rcec->rcX);
4487 }
4488 }
4489}
4490
sewardj8fd92d32008-11-20 23:17:01 +00004491__attribute__((noinline))
philippe158404e2015-04-10 19:34:14 +00004492static void event_map_GC ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004493{
4494 OldRef* oldref;
4495 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00004496 XArray* refs2del;
4497 Word i, j, n2del;
4498
sewardj8fd92d32008-11-20 23:17:01 +00004499 UWord* genMap = NULL;
4500 UWord genMap_min = 0;
4501 UWord genMap_size = 0;
4502
sewardjf98e1c02008-10-25 16:22:41 +00004503 if (0)
4504 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
4505
sewardj849b0ed2008-12-21 10:43:10 +00004506 /* Check for sane command line params. Limit values must match
4507 those in hg_process_cmd_line_option. */
4508 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
sewardjf585e482009-08-16 22:52:29 +00004509 tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 );
sewardj849b0ed2008-12-21 10:43:10 +00004510
sewardj8f5374e2008-12-07 11:40:17 +00004511 /* Check our counting is sane (expensive) */
4512 if (CHECK_CEM)
4513 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00004514
sewardj8f5374e2008-12-07 11:40:17 +00004515 /* Check the reference counts (expensive) */
4516 if (CHECK_CEM)
4517 event_map__check_reference_counts( True/*before*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004518
sewardj8fd92d32008-11-20 23:17:01 +00004519 /* Compute the distribution of generation values in the ref tree.
4520 There are likely only to be a few different generation numbers
4521 in the whole tree, but we don't know what they are. Hence use a
4522 dynamically resized array of counters. The array is genMap[0
4523 .. genMap_size-1], where genMap[0] is the count for the
4524 generation number genMap_min, genMap[1] is the count for
4525 genMap_min+1, etc. If a new number is seen outside the range
4526 [genMap_min .. genMap_min + genMap_size - 1] then the array is
4527 copied into a larger array, and genMap_min and genMap_size are
4528 adjusted accordingly. */
4529
sewardjf98e1c02008-10-25 16:22:41 +00004530 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00004531
sewardjbc307e52008-12-06 22:10:54 +00004532 VG_(initIterSWA)( oldrefTree );
4533 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00004534
sewardjd86e3a22008-12-03 11:39:37 +00004535 UWord ea, key;
4536 oldref = (OldRef*)valW;
4537 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00004538
4539 /* BEGIN find 'ea', which is the index in genMap holding the
4540 count for generation number 'key'. */
4541 if (UNLIKELY(genMap == NULL)) {
4542 /* deal with the first key to be seen, so that the following
4543 cases don't need to handle the complexity of a NULL count
4544 array. */
4545 genMap_min = key;
4546 genMap_size = 1;
4547 genMap = HG_(zalloc)( "libhb.emmG.1a",
4548 genMap_size * sizeof(UWord) );
4549 ea = 0;
4550 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
4551 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004552 }
sewardj8fd92d32008-11-20 23:17:01 +00004553 else
4554 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
4555 /* this is the expected (almost-always-happens) case: 'key'
4556 is already mapped in the array. */
4557 ea = key - genMap_min;
4558 }
4559 else
4560 if (key < genMap_min) {
4561 /* 'key' appears before the start of the current array.
4562 Extend the current array by allocating a larger one and
4563 copying the current one to the upper end of it. */
4564 Word more;
4565 UWord* map2;
4566 more = genMap_min - key;
4567 tl_assert(more > 0);
4568 map2 = HG_(zalloc)( "libhb.emmG.1b",
4569 (genMap_size + more) * sizeof(UWord) );
4570 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
4571 HG_(free)( genMap );
4572 genMap = map2;
4573 genMap_size += more;
4574 genMap_min -= more;
4575 ea = 0;
4576 tl_assert(genMap_min == key);
4577 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
4578 key, genMap_min, genMap_min+genMap_size- 1 );
4579 }
4580 else {
4581 /* 'key' appears after the end of the current array. Extend
4582 the current array by allocating a larger one and copying
4583 the current one to the lower end of it. */
4584 Word more;
4585 UWord* map2;
4586 tl_assert(key >= genMap_min + genMap_size);
4587 more = key - (genMap_min + genMap_size) + 1;
4588 tl_assert(more > 0);
4589 map2 = HG_(zalloc)( "libhb.emmG.1c",
4590 (genMap_size + more) * sizeof(UWord) );
4591 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
4592 HG_(free)( genMap );
4593 genMap = map2;
4594 genMap_size += more;
4595 ea = genMap_size - 1;;
4596 tl_assert(genMap_min + genMap_size - 1 == key);
4597 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
4598 key, genMap_min, genMap_min+genMap_size- 1 );
4599 }
4600 /* END find 'ea' from 'key' */
4601
4602 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00004603 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00004604 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00004605 }
4606
sewardj8fd92d32008-11-20 23:17:01 +00004607 tl_assert(genMap);
4608 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00004609
sewardj8fd92d32008-11-20 23:17:01 +00004610 /* Sanity check what we just computed */
4611 { UWord sum = 0;
4612 for (i = 0; i < genMap_size; i++) {
4613 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
4614 i + genMap_min, genMap[i] );
4615 sum += genMap[i];
4616 }
4617 tl_assert(sum == oldrefTreeN);
4618 }
4619
4620 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00004621 retained = oldrefTreeN;
4622 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00004623
4624 for (i = 0; i < genMap_size; i++) {
4625 keyW = i + genMap_min;
4626 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00004627 tl_assert(keyW > 0); /* can't allow a generation # 0 */
4628 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
4629 tl_assert(keyW >= maxGen);
4630 tl_assert(retained >= valW);
4631 if (retained - valW
sewardj849b0ed2008-12-21 10:43:10 +00004632 > (UWord)(HG_(clo_conflict_cache_size)
4633 * EVENT_MAP_GC_DISCARD_FRACTION)) {
sewardjf98e1c02008-10-25 16:22:41 +00004634 retained -= valW;
4635 maxGen = keyW;
4636 } else {
4637 break;
4638 }
4639 }
sewardjf98e1c02008-10-25 16:22:41 +00004640
sewardj8fd92d32008-11-20 23:17:01 +00004641 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00004642
sewardj9b1f0fd2008-11-18 23:40:00 +00004643 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00004644
4645 /* Now make up a big list of the oldrefTree entries we want to
4646 delete. We can't simultaneously traverse the tree and delete
4647 stuff from it, so first we need to copy them off somewhere
4648 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00004649 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00004650 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00004651
sewardj9b1f0fd2008-11-18 23:40:00 +00004652 if (retained < oldrefTreeN) {
4653
4654 /* This is the normal (expected) case. We discard any ref whose
4655 generation number <= maxGen. */
sewardjbc307e52008-12-06 22:10:54 +00004656 VG_(initIterSWA)( oldrefTree );
4657 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004658 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00004659 tl_assert(oldref->magic == OldRef_MAGIC);
4660 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00004661 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00004662 }
sewardjf98e1c02008-10-25 16:22:41 +00004663 }
sewardj5e2ac3b2009-08-11 10:39:25 +00004664 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004665 VG_(message)(Vg_DebugMsg,
4666 "libhb: EvM GC: delete generations %lu and below, "
sewardj24118492009-07-15 14:50:02 +00004667 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00004668 maxGen, retained );
4669 }
4670
4671 } else {
4672
4673 static UInt rand_seed = 0; /* leave as static */
4674
4675 /* Degenerate case: there's only one generation in the entire
4676 tree, so we need to have some other way of deciding which
4677 refs to throw away. Just throw out half of them randomly. */
4678 tl_assert(retained == oldrefTreeN);
sewardjbc307e52008-12-06 22:10:54 +00004679 VG_(initIterSWA)( oldrefTree );
4680 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004681 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00004682 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00004683 tl_assert(oldref->magic == OldRef_MAGIC);
4684 n = VG_(random)( &rand_seed );
4685 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00004686 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00004687 retained--;
4688 }
4689 }
sewardj5e2ac3b2009-08-11 10:39:25 +00004690 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004691 VG_(message)(Vg_DebugMsg,
4692 "libhb: EvM GC: randomly delete half the entries, "
sewardj24118492009-07-15 14:50:02 +00004693 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00004694 retained );
4695 }
4696
sewardjf98e1c02008-10-25 16:22:41 +00004697 }
4698
4699 n2del = VG_(sizeXA)( refs2del );
4700 tl_assert(n2del == (Word)(oldrefTreeN - retained));
4701
4702 if (0) VG_(printf)("%s","deleting entries\n");
4703 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00004704 Bool b;
4705 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
philippe40648e22015-04-11 11:42:22 +00004706 b = VG_(delFromSWA)( oldrefTree, &valW, ga2del );
sewardjd86e3a22008-12-03 11:39:37 +00004707 tl_assert(b);
sewardjd86e3a22008-12-03 11:39:37 +00004708 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004709 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004710 ThrID aThrID = oldref->accs[j].thrid;
4711 RCEC* aRef = oldref->accs[j].rcec;
sewardjc5ea9962008-12-07 01:41:46 +00004712 if (aRef) {
sewardjffce8152011-06-24 10:09:41 +00004713 tl_assert(aThrID != 0);
sewardjf98e1c02008-10-25 16:22:41 +00004714 stats__ctxt_rcdec3++;
sewardjc5ea9962008-12-07 01:41:46 +00004715 ctxt__rcdec( aRef );
sewardjf98e1c02008-10-25 16:22:41 +00004716 } else {
sewardjffce8152011-06-24 10:09:41 +00004717 tl_assert(aThrID == 0);
sewardjf98e1c02008-10-25 16:22:41 +00004718 }
4719 }
sewardjd86e3a22008-12-03 11:39:37 +00004720
4721 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00004722 }
4723
4724 VG_(deleteXA)( refs2del );
4725
sewardjc5ea9962008-12-07 01:41:46 +00004726 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00004727
4728 oldrefTreeN = retained;
4729 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
4730
4731 /* Throw away all RCECs with zero reference counts */
4732 for (i = 0; i < N_RCEC_TAB; i++) {
4733 RCEC** pp = &contextTab[i];
4734 RCEC* p = *pp;
4735 while (p) {
4736 if (p->rc == 0) {
4737 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004738 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004739 p = *pp;
4740 tl_assert(stats__ctxt_tab_curr > 0);
philippe06bc23a2015-04-17 21:19:43 +00004741 stats__ctxt_rcdec_discards++;
sewardjf98e1c02008-10-25 16:22:41 +00004742 stats__ctxt_tab_curr--;
4743 } else {
4744 pp = &p->next;
4745 p = p->next;
4746 }
4747 }
4748 }
4749
sewardj8f5374e2008-12-07 11:40:17 +00004750 /* Check the reference counts (expensive) */
4751 if (CHECK_CEM)
4752 event_map__check_reference_counts( False/*after*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004753
4754 //if (0)
4755 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
4756 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
4757
4758}
4759
4760
4761/////////////////////////////////////////////////////////
4762// //
4763// Core MSM //
4764// //
4765/////////////////////////////////////////////////////////
4766
sewardj23f12002009-07-24 08:45:08 +00004767/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4768 Nov 08, and again after [...],
4769 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004770
sewardj23f12002009-07-24 08:45:08 +00004771static ULong stats__msmcread = 0;
4772static ULong stats__msmcread_change = 0;
4773static ULong stats__msmcwrite = 0;
4774static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004775
sewardj8ab2c132009-08-02 09:34:35 +00004776/* Some notes on the H1 history mechanism:
4777
4778 Transition rules are:
4779
4780 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4781 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4782
4783 After any access by a thread T to a location L, L's constraint pair
4784 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4785
4786 After a race by thread T conflicting with some previous access by
4787 some other thread U, for a location with constraint (before
4788 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4789 which the previously access lies.
4790
4791 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4792 are compared so as to find out which thread(s) this access
4793 conflicts with. Once that is established, we also require the
4794 pre-update Cw for the location, so we can index into it for those
4795 threads, to get the scalar clock values for the point at which the
4796 former accesses were made. (In fact we only bother to do any of
4797 this for an arbitrarily chosen one of the conflicting threads, as
4798 that's simpler, it avoids flooding the user with vast amounts of
4799 mostly useless information, and because the program is wrong if it
4800 contains any races at all -- so we don't really need to show all
4801 conflicting access pairs initially, so long as we only show none if
4802 none exist).
4803
4804 ---
4805
4806 That requires the auxiliary proof that
4807
4808 (Cr `join` Kw)[T] == Kw[T]
4809
4810 Why should that be true? Because for any thread T, Kw[T] >= the
4811 scalar clock value for T known by any other thread. In other
4812 words, because T's value for its own scalar clock is at least as up
4813 to date as the value for it known by any other thread (that is true
4814 for both the R- and W- scalar clocks). Hence no other thread will
4815 be able to feed in a value for that element (indirectly via a
4816 constraint) which will exceed Kw[T], and hence the join cannot
4817 cause that particular element to advance.
4818*/
4819
sewardjf98e1c02008-10-25 16:22:41 +00004820__attribute__((noinline))
4821static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004822 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00004823 VtsID Cfailed,
4824 VtsID Kfailed,
4825 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00004826{
sewardjc5ea9962008-12-07 01:41:46 +00004827 /* Call here to report a race. We just hand it onwards to
4828 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004829 error is going to be collected, then, at history_level 2, that
4830 queries the conflicting-event map. The alternative would be to
4831 query it right here. But that causes a lot of pointless queries
4832 for errors which will shortly be discarded as duplicates, and
4833 can become a performance overhead; so we defer the query until
4834 we know the error is not a duplicate. */
4835
4836 /* Stacks for the bounds of the (or one of the) conflicting
4837 segment(s). These are only set at history_level 1. */
4838 ExeContext* hist1_seg_start = NULL;
4839 ExeContext* hist1_seg_end = NULL;
4840 Thread* hist1_conf_thr = NULL;
4841
4842 tl_assert(acc_thr);
sewardj60626642011-03-10 15:14:37 +00004843 tl_assert(acc_thr->hgthread);
4844 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
sewardj23f12002009-07-24 08:45:08 +00004845 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4846
4847 if (HG_(clo_history_level) == 1) {
4848 Bool found;
4849 Word firstIx, lastIx;
4850 ULong_n_EC key;
4851
4852 /* At history_level 1, we must round up the relevant stack-pair
4853 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00004854 deferring it is complex; we can't (easily) put Kfailed and
4855 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00004856 getting tied up in difficulties with VtsID reference
4857 counting. So just do it now. */
4858 Thr* confThr;
4859 ULong confTym = 0;
4860 /* Which thread are we in conflict with? There may be more than
4861 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4862 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00004863 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00004864 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00004865 conflict (semantics of return value of
4866 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4867 called us, just checked exactly this -- that there was in
4868 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00004869 tl_assert(confThr);
4870
4871 /* Get the scalar clock value that the conflicting thread
4872 introduced into the constraint. A careful examination of the
4873 base machine rules shows that this must be the same as the
4874 conflicting thread's scalar clock when it created this
4875 constraint. Hence we know the scalar clock of the
4876 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00004877 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00004878
4879 /* Using this scalar clock, index into the conflicting thread's
4880 collection of stack traces made each time its vector clock
4881 (hence its scalar clock) changed. This gives the stack
4882 traces at the start and end of the conflicting segment (well,
4883 as per comment just above, of one of the conflicting
4884 segments, if there are more than one). */
4885 key.ull = confTym;
4886 key.ec = NULL;
4887 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00004888 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004889 firstIx = lastIx = 0;
4890 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00004891 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004892 &key, &firstIx, &lastIx,
florian6bd9dc12012-11-23 16:17:43 +00004893 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
sewardj23f12002009-07-24 08:45:08 +00004894 );
sewardj8ab2c132009-08-02 09:34:35 +00004895 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00004896 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00004897 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00004898 confThr, confTym, found, firstIx, lastIx);
4899 /* We can't indefinitely collect stack traces at VTS
4900 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00004901 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00004902 ones, which in turn means we might fail to find index value
4903 confTym in the array. */
4904 if (found) {
4905 ULong_n_EC *pair_start, *pair_end;
4906 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00004907 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00004908 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004909 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00004910 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00004911 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004912 lastIx+1 );
4913 /* from properties of VG_(lookupXA) and the comparison fn used: */
4914 tl_assert(pair_start->ull < pair_end->ull);
4915 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004916 /* Could do a bit better here. It may be that pair_end
4917 doesn't have a stack, but the following entries in the
4918 array have the same scalar Kw and to have a stack. So
4919 we should search a bit further along the array than
4920 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00004921 } else {
sewardjffce8152011-06-24 10:09:41 +00004922 if (!confThr->llexit_done)
sewardj23f12002009-07-24 08:45:08 +00004923 hist1_seg_end = main_get_EC( confThr );
4924 }
4925 // seg_start could be NULL iff this is the first stack in the thread
4926 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4927 //if (seg_end) VG_(pp_ExeContext)(seg_end);
sewardj60626642011-03-10 15:14:37 +00004928 hist1_conf_thr = confThr->hgthread;
sewardj23f12002009-07-24 08:45:08 +00004929 }
4930 }
4931
sewardj60626642011-03-10 15:14:37 +00004932 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00004933 szB, isWrite,
4934 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00004935}
4936
4937static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00004938 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00004939 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00004940 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4941 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00004942}
4943
4944
4945/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00004946static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004947 /* The following are only needed for
4948 creating error reports. */
4949 Thr* acc_thr,
4950 Addr acc_addr, SizeT szB )
4951{
4952 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004953 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00004954
4955 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004956 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004957 tl_assert(is_sane_SVal_C(svOld));
4958 }
4959
sewardj1c0ce7a2009-07-01 08:10:49 +00004960 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004961 VtsID tviR = acc_thr->viR;
4962 VtsID tviW = acc_thr->viW;
4963 VtsID rmini = SVal__unC_Rmin(svOld);
4964 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004965 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4966 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004967 /* no race */
4968 /* Note: RWLOCK subtlety: use tviW, not tviR */
4969 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4970 goto out;
4971 } else {
sewardjb0e009d2008-11-19 16:35:15 +00004972 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004973 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4974 tl_assert(leqxx);
4975 // same as in non-race case
4976 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4977 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004978 rmini, /* Cfailed */
4979 tviR, /* Kfailed */
4980 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004981 goto out;
4982 }
4983 }
4984 if (SVal__isA(svOld)) {
4985 /* reading no-access memory (sigh); leave unchanged */
4986 /* check for no pollution */
4987 tl_assert(svOld == SVal_NOACCESS);
4988 svNew = SVal_NOACCESS;
4989 goto out;
4990 }
sewardj23f12002009-07-24 08:45:08 +00004991 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004992 tl_assert(0);
4993
4994 out:
sewardj8f5374e2008-12-07 11:40:17 +00004995 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004996 tl_assert(is_sane_SVal_C(svNew));
4997 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004998 if (UNLIKELY(svNew != svOld)) {
4999 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005000 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005001 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005002 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005003 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005004 }
5005 }
5006 return svNew;
5007}
5008
5009
5010/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00005011static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005012 /* The following are only needed for
5013 creating error reports. */
5014 Thr* acc_thr,
5015 Addr acc_addr, SizeT szB )
5016{
5017 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005018 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00005019
5020 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005021 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005022 tl_assert(is_sane_SVal_C(svOld));
5023 }
5024
sewardj1c0ce7a2009-07-01 08:10:49 +00005025 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005026 VtsID tviW = acc_thr->viW;
5027 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005028 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5029 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005030 /* no race */
5031 svNew = SVal__mkC( tviW, tviW );
5032 goto out;
5033 } else {
5034 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00005035 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005036 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5037 tl_assert(leqxx);
5038 // same as in non-race case
5039 // proof: in the non-race case, we have
5040 // rmini <= wmini (invar on constraints)
5041 // tviW <= tviR (invar on thread clocks)
5042 // wmini <= tviW (from run-time check)
5043 // hence from transitivity of <= we have
5044 // rmini <= wmini <= tviW
5045 // and so join(rmini,tviW) == tviW
5046 // and join(wmini,tviW) == tviW
5047 // qed.
5048 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5049 VtsID__join2(wmini, tviW) );
5050 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005051 wmini, /* Cfailed */
5052 tviW, /* Kfailed */
5053 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005054 goto out;
5055 }
5056 }
5057 if (SVal__isA(svOld)) {
5058 /* writing no-access memory (sigh); leave unchanged */
5059 /* check for no pollution */
5060 tl_assert(svOld == SVal_NOACCESS);
5061 svNew = SVal_NOACCESS;
5062 goto out;
5063 }
sewardj23f12002009-07-24 08:45:08 +00005064 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005065 tl_assert(0);
5066
5067 out:
sewardj8f5374e2008-12-07 11:40:17 +00005068 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005069 tl_assert(is_sane_SVal_C(svNew));
5070 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005071 if (UNLIKELY(svNew != svOld)) {
5072 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005073 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005074 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005075 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005076 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005077 }
5078 }
5079 return svNew;
5080}
5081
5082
5083/////////////////////////////////////////////////////////
5084// //
5085// Apply core MSM to specific memory locations //
5086// //
5087/////////////////////////////////////////////////////////
5088
sewardj23f12002009-07-24 08:45:08 +00005089/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005090
sewardj23f12002009-07-24 08:45:08 +00005091static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005092 CacheLine* cl;
5093 UWord cloff, tno, toff;
5094 SVal svOld, svNew;
5095 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005096 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005097 cl = get_cacheline(a);
5098 cloff = get_cacheline_offset(a);
5099 tno = get_treeno(a);
5100 toff = get_tree_offset(a); /* == 0 .. 7 */
5101 descr = cl->descrs[tno];
5102 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5103 SVal* tree = &cl->svals[tno << 3];
5104 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005105 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005106 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5107 }
5108 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005109 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005110 if (CHECK_ZSM)
5111 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005112 cl->svals[cloff] = svNew;
5113}
5114
sewardj23f12002009-07-24 08:45:08 +00005115static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005116 CacheLine* cl;
5117 UWord cloff, tno, toff;
5118 SVal svOld, svNew;
5119 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005120 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005121 cl = get_cacheline(a);
5122 cloff = get_cacheline_offset(a);
5123 tno = get_treeno(a);
5124 toff = get_tree_offset(a); /* == 0 .. 7 */
5125 descr = cl->descrs[tno];
5126 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5127 SVal* tree = &cl->svals[tno << 3];
5128 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005129 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005130 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5131 }
5132 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005133 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005134 if (CHECK_ZSM)
5135 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005136 cl->svals[cloff] = svNew;
5137}
5138
sewardj23f12002009-07-24 08:45:08 +00005139/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005140
sewardj23f12002009-07-24 08:45:08 +00005141static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005142 CacheLine* cl;
5143 UWord cloff, tno, toff;
5144 SVal svOld, svNew;
5145 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005146 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005147 if (UNLIKELY(!aligned16(a))) goto slowcase;
5148 cl = get_cacheline(a);
5149 cloff = get_cacheline_offset(a);
5150 tno = get_treeno(a);
5151 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5152 descr = cl->descrs[tno];
5153 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5154 if (valid_value_is_below_me_16(descr, toff)) {
5155 goto slowcase;
5156 } else {
5157 SVal* tree = &cl->svals[tno << 3];
5158 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5159 }
sewardj8f5374e2008-12-07 11:40:17 +00005160 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005161 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5162 }
5163 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005164 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005165 if (CHECK_ZSM)
5166 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005167 cl->svals[cloff] = svNew;
5168 return;
5169 slowcase: /* misaligned, or must go further down the tree */
5170 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005171 zsm_sapply08__msmcread( thr, a + 0 );
5172 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005173}
5174
sewardj23f12002009-07-24 08:45:08 +00005175static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005176 CacheLine* cl;
5177 UWord cloff, tno, toff;
5178 SVal svOld, svNew;
5179 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005180 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005181 if (UNLIKELY(!aligned16(a))) goto slowcase;
5182 cl = get_cacheline(a);
5183 cloff = get_cacheline_offset(a);
5184 tno = get_treeno(a);
5185 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5186 descr = cl->descrs[tno];
5187 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5188 if (valid_value_is_below_me_16(descr, toff)) {
5189 goto slowcase;
5190 } else {
5191 SVal* tree = &cl->svals[tno << 3];
5192 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5193 }
sewardj8f5374e2008-12-07 11:40:17 +00005194 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005195 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5196 }
5197 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005198 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005199 if (CHECK_ZSM)
5200 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005201 cl->svals[cloff] = svNew;
5202 return;
5203 slowcase: /* misaligned, or must go further down the tree */
5204 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005205 zsm_sapply08__msmcwrite( thr, a + 0 );
5206 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005207}
5208
sewardj23f12002009-07-24 08:45:08 +00005209/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005210
sewardj23f12002009-07-24 08:45:08 +00005211static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005212 CacheLine* cl;
5213 UWord cloff, tno, toff;
5214 SVal svOld, svNew;
5215 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005216 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005217 if (UNLIKELY(!aligned32(a))) goto slowcase;
5218 cl = get_cacheline(a);
5219 cloff = get_cacheline_offset(a);
5220 tno = get_treeno(a);
5221 toff = get_tree_offset(a); /* == 0 or 4 */
5222 descr = cl->descrs[tno];
5223 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5224 if (valid_value_is_above_me_32(descr, toff)) {
5225 SVal* tree = &cl->svals[tno << 3];
5226 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5227 } else {
5228 goto slowcase;
5229 }
sewardj8f5374e2008-12-07 11:40:17 +00005230 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005231 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5232 }
5233 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005234 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005235 if (CHECK_ZSM)
5236 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005237 cl->svals[cloff] = svNew;
5238 return;
5239 slowcase: /* misaligned, or must go further down the tree */
5240 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005241 zsm_sapply16__msmcread( thr, a + 0 );
5242 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005243}
5244
sewardj23f12002009-07-24 08:45:08 +00005245static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005246 CacheLine* cl;
5247 UWord cloff, tno, toff;
5248 SVal svOld, svNew;
5249 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005250 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005251 if (UNLIKELY(!aligned32(a))) goto slowcase;
5252 cl = get_cacheline(a);
5253 cloff = get_cacheline_offset(a);
5254 tno = get_treeno(a);
5255 toff = get_tree_offset(a); /* == 0 or 4 */
5256 descr = cl->descrs[tno];
5257 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5258 if (valid_value_is_above_me_32(descr, toff)) {
5259 SVal* tree = &cl->svals[tno << 3];
5260 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5261 } else {
5262 goto slowcase;
5263 }
sewardj8f5374e2008-12-07 11:40:17 +00005264 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005265 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5266 }
5267 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005268 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005269 if (CHECK_ZSM)
5270 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005271 cl->svals[cloff] = svNew;
5272 return;
5273 slowcase: /* misaligned, or must go further down the tree */
5274 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005275 zsm_sapply16__msmcwrite( thr, a + 0 );
5276 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005277}
5278
sewardj23f12002009-07-24 08:45:08 +00005279/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005280
sewardj23f12002009-07-24 08:45:08 +00005281static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005282 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005283 UWord cloff, tno;
5284 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005285 SVal svOld, svNew;
5286 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005287 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005288 if (UNLIKELY(!aligned64(a))) goto slowcase;
5289 cl = get_cacheline(a);
5290 cloff = get_cacheline_offset(a);
5291 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005292 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005293 descr = cl->descrs[tno];
5294 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5295 goto slowcase;
5296 }
5297 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005298 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005299 if (CHECK_ZSM)
5300 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005301 cl->svals[cloff] = svNew;
5302 return;
5303 slowcase: /* misaligned, or must go further down the tree */
5304 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005305 zsm_sapply32__msmcread( thr, a + 0 );
5306 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005307}
5308
sewardj23f12002009-07-24 08:45:08 +00005309static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005310 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005311 UWord cloff, tno;
5312 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005313 SVal svOld, svNew;
5314 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005315 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005316 if (UNLIKELY(!aligned64(a))) goto slowcase;
5317 cl = get_cacheline(a);
5318 cloff = get_cacheline_offset(a);
5319 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005320 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005321 descr = cl->descrs[tno];
5322 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5323 goto slowcase;
5324 }
5325 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005326 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005327 if (CHECK_ZSM)
5328 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005329 cl->svals[cloff] = svNew;
5330 return;
5331 slowcase: /* misaligned, or must go further down the tree */
5332 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005333 zsm_sapply32__msmcwrite( thr, a + 0 );
5334 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005335}
5336
sewardj23f12002009-07-24 08:45:08 +00005337/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005338
5339static
sewardj23f12002009-07-24 08:45:08 +00005340void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005341 CacheLine* cl;
5342 UWord cloff, tno, toff;
5343 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005344 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005345 cl = get_cacheline(a);
5346 cloff = get_cacheline_offset(a);
5347 tno = get_treeno(a);
5348 toff = get_tree_offset(a); /* == 0 .. 7 */
5349 descr = cl->descrs[tno];
5350 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5351 SVal* tree = &cl->svals[tno << 3];
5352 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005353 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005354 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5355 }
5356 tl_assert(svNew != SVal_INVALID);
5357 cl->svals[cloff] = svNew;
5358}
5359
sewardj23f12002009-07-24 08:45:08 +00005360/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005361
5362static
sewardj23f12002009-07-24 08:45:08 +00005363void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005364 CacheLine* cl;
5365 UWord cloff, tno, toff;
5366 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005367 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005368 if (UNLIKELY(!aligned16(a))) goto slowcase;
5369 cl = get_cacheline(a);
5370 cloff = get_cacheline_offset(a);
5371 tno = get_treeno(a);
5372 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5373 descr = cl->descrs[tno];
5374 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5375 if (valid_value_is_below_me_16(descr, toff)) {
5376 /* Writing at this level. Need to fix up 'descr'. */
5377 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5378 /* At this point, the tree does not match cl->descr[tno] any
5379 more. The assignments below will fix it up. */
5380 } else {
5381 /* We can't indiscriminately write on the w16 node as in the
5382 w64 case, as that might make the node inconsistent with
5383 its parent. So first, pull down to this level. */
5384 SVal* tree = &cl->svals[tno << 3];
5385 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005386 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005387 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5388 }
5389 }
5390 tl_assert(svNew != SVal_INVALID);
5391 cl->svals[cloff + 0] = svNew;
5392 cl->svals[cloff + 1] = SVal_INVALID;
5393 return;
5394 slowcase: /* misaligned */
5395 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005396 zsm_swrite08( a + 0, svNew );
5397 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005398}
5399
sewardj23f12002009-07-24 08:45:08 +00005400/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005401
5402static
sewardj23f12002009-07-24 08:45:08 +00005403void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005404 CacheLine* cl;
5405 UWord cloff, tno, toff;
5406 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005407 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005408 if (UNLIKELY(!aligned32(a))) goto slowcase;
5409 cl = get_cacheline(a);
5410 cloff = get_cacheline_offset(a);
5411 tno = get_treeno(a);
5412 toff = get_tree_offset(a); /* == 0 or 4 */
5413 descr = cl->descrs[tno];
5414 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5415 if (valid_value_is_above_me_32(descr, toff)) {
5416 /* We can't indiscriminately write on the w32 node as in the
5417 w64 case, as that might make the node inconsistent with
5418 its parent. So first, pull down to this level. */
5419 SVal* tree = &cl->svals[tno << 3];
5420 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005421 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005422 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5423 } else {
5424 /* Writing at this level. Need to fix up 'descr'. */
5425 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5426 /* At this point, the tree does not match cl->descr[tno] any
5427 more. The assignments below will fix it up. */
5428 }
5429 }
5430 tl_assert(svNew != SVal_INVALID);
5431 cl->svals[cloff + 0] = svNew;
5432 cl->svals[cloff + 1] = SVal_INVALID;
5433 cl->svals[cloff + 2] = SVal_INVALID;
5434 cl->svals[cloff + 3] = SVal_INVALID;
5435 return;
5436 slowcase: /* misaligned */
5437 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005438 zsm_swrite16( a + 0, svNew );
5439 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005440}
5441
sewardj23f12002009-07-24 08:45:08 +00005442/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005443
5444static
sewardj23f12002009-07-24 08:45:08 +00005445void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005446 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005447 UWord cloff, tno;
5448 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00005449 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005450 if (UNLIKELY(!aligned64(a))) goto slowcase;
5451 cl = get_cacheline(a);
5452 cloff = get_cacheline_offset(a);
5453 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005454 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005455 cl->descrs[tno] = TREE_DESCR_64;
5456 tl_assert(svNew != SVal_INVALID);
5457 cl->svals[cloff + 0] = svNew;
5458 cl->svals[cloff + 1] = SVal_INVALID;
5459 cl->svals[cloff + 2] = SVal_INVALID;
5460 cl->svals[cloff + 3] = SVal_INVALID;
5461 cl->svals[cloff + 4] = SVal_INVALID;
5462 cl->svals[cloff + 5] = SVal_INVALID;
5463 cl->svals[cloff + 6] = SVal_INVALID;
5464 cl->svals[cloff + 7] = SVal_INVALID;
5465 return;
5466 slowcase: /* misaligned */
5467 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005468 zsm_swrite32( a + 0, svNew );
5469 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005470}
5471
sewardj23f12002009-07-24 08:45:08 +00005472/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005473
5474static
sewardj23f12002009-07-24 08:45:08 +00005475SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005476 CacheLine* cl;
5477 UWord cloff, tno, toff;
5478 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005479 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005480 cl = get_cacheline(a);
5481 cloff = get_cacheline_offset(a);
5482 tno = get_treeno(a);
5483 toff = get_tree_offset(a); /* == 0 .. 7 */
5484 descr = cl->descrs[tno];
5485 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5486 SVal* tree = &cl->svals[tno << 3];
5487 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5488 }
5489 return cl->svals[cloff];
5490}
5491
sewardj23f12002009-07-24 08:45:08 +00005492static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00005493 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00005494 stats__cline_scopy08s++;
5495 sv = zsm_sread08( src );
5496 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00005497}
5498
5499
sewardj23f12002009-07-24 08:45:08 +00005500/* Block-copy states (needed for implementing realloc()). Note this
5501 doesn't change the filtering arrangements. The caller of
5502 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00005503
sewardj23f12002009-07-24 08:45:08 +00005504static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005505{
5506 SizeT i;
5507 if (len == 0)
5508 return;
5509
5510 /* assert for non-overlappingness */
5511 tl_assert(src+len <= dst || dst+len <= src);
5512
5513 /* To be simple, just copy byte by byte. But so as not to wreck
5514 performance for later accesses to dst[0 .. len-1], normalise
5515 destination lines as we finish with them, and also normalise the
5516 line containing the first and last address. */
5517 for (i = 0; i < len; i++) {
5518 Bool normalise
5519 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5520 || i == 0 /* first in range */
5521 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00005522 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00005523 }
5524}
5525
5526
5527/* For setting address ranges to a given value. Has considerable
5528 sophistication so as to avoid generating large numbers of pointless
5529 cache loads/writebacks for large ranges. */
5530
5531/* Do small ranges in-cache, in the obvious way. */
5532static
sewardj23f12002009-07-24 08:45:08 +00005533void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005534{
5535 /* fast track a couple of common cases */
5536 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00005537 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005538 return;
5539 }
5540 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00005541 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005542 return;
5543 }
5544
5545 /* be completely general (but as efficient as possible) */
5546 if (len == 0) return;
5547
5548 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005549 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005550 a += 1;
5551 len -= 1;
5552 tl_assert(aligned16(a));
5553 }
5554 if (len == 0) return;
5555
5556 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005557 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005558 a += 2;
5559 len -= 2;
5560 tl_assert(aligned32(a));
5561 }
5562 if (len == 0) return;
5563
5564 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005565 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005566 a += 4;
5567 len -= 4;
5568 tl_assert(aligned64(a));
5569 }
5570 if (len == 0) return;
5571
5572 if (len >= 8) {
5573 tl_assert(aligned64(a));
5574 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00005575 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005576 a += 8;
5577 len -= 8;
5578 }
5579 tl_assert(aligned64(a));
5580 }
5581 if (len == 0) return;
5582
5583 if (len >= 4)
5584 tl_assert(aligned32(a));
5585 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005586 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005587 a += 4;
5588 len -= 4;
5589 }
5590 if (len == 0) return;
5591
5592 if (len >= 2)
5593 tl_assert(aligned16(a));
5594 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005595 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005596 a += 2;
5597 len -= 2;
5598 }
5599 if (len == 0) return;
5600
5601 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005602 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00005603 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00005604 len -= 1;
5605 }
5606 tl_assert(len == 0);
5607}
5608
5609
sewardj23f12002009-07-24 08:45:08 +00005610/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00005611 for larger ranges, try to operate directly on the out-of-cache
5612 representation, rather than dragging lines into the cache,
5613 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00005614 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00005615
sewardj23f12002009-07-24 08:45:08 +00005616 Note that this doesn't change the filtering arrangements. The
5617 caller of zsm_sset_range needs to attend to that. */
5618
5619static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005620{
5621 tl_assert(svNew != SVal_INVALID);
5622 stats__cache_make_New_arange += (ULong)len;
5623
5624 if (0 && len > 500)
5625 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
5626
5627 if (0) {
5628 static UWord n_New_in_cache = 0;
5629 static UWord n_New_not_in_cache = 0;
5630 /* tag is 'a' with the in-line offset masked out,
5631 eg a[31]..a[4] 0000 */
5632 Addr tag = a & ~(N_LINE_ARANGE - 1);
5633 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5634 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5635 n_New_in_cache++;
5636 } else {
5637 n_New_not_in_cache++;
5638 }
5639 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5640 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5641 n_New_in_cache, n_New_not_in_cache );
5642 }
5643
5644 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00005645 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005646 } else {
5647 Addr before_start = a;
5648 Addr aligned_start = cacheline_ROUNDUP(a);
5649 Addr after_start = cacheline_ROUNDDN(a + len);
5650 UWord before_len = aligned_start - before_start;
5651 UWord aligned_len = after_start - aligned_start;
5652 UWord after_len = a + len - after_start;
5653 tl_assert(before_start <= aligned_start);
5654 tl_assert(aligned_start <= after_start);
5655 tl_assert(before_len < N_LINE_ARANGE);
5656 tl_assert(after_len < N_LINE_ARANGE);
5657 tl_assert(get_cacheline_offset(aligned_start) == 0);
5658 if (get_cacheline_offset(a) == 0) {
5659 tl_assert(before_len == 0);
5660 tl_assert(a == aligned_start);
5661 }
5662 if (get_cacheline_offset(a+len) == 0) {
5663 tl_assert(after_len == 0);
5664 tl_assert(after_start == a+len);
5665 }
5666 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005667 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005668 }
5669 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005670 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005671 }
5672 stats__cache_make_New_inZrep += (ULong)aligned_len;
5673
5674 while (1) {
5675 Addr tag;
5676 UWord wix;
5677 if (aligned_start >= after_start)
5678 break;
5679 tl_assert(get_cacheline_offset(aligned_start) == 0);
5680 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5681 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5682 if (tag == cache_shmem.tags0[wix]) {
5683 UWord i;
5684 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005685 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005686 } else {
5687 UWord i;
5688 Word zix;
5689 SecMap* sm;
5690 LineZ* lineZ;
5691 /* This line is not in the cache. Do not force it in; instead
5692 modify it in-place. */
5693 /* find the Z line to write in and rcdec it or the
5694 associated F line. */
5695 find_Z_for_writing( &sm, &zix, tag );
5696 tl_assert(sm);
5697 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5698 lineZ = &sm->linesZ[zix];
5699 lineZ->dict[0] = svNew;
5700 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5701 for (i = 0; i < N_LINE_ARANGE/4; i++)
5702 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5703 rcinc_LineZ(lineZ);
5704 }
5705 aligned_start += N_LINE_ARANGE;
5706 aligned_len -= N_LINE_ARANGE;
5707 }
5708 tl_assert(aligned_start == after_start);
5709 tl_assert(aligned_len == 0);
5710 }
5711}
5712
5713
5714/////////////////////////////////////////////////////////
5715// //
sewardj23f12002009-07-24 08:45:08 +00005716// Front-filtering accesses //
5717// //
5718/////////////////////////////////////////////////////////
5719
5720static UWord stats__f_ac = 0;
5721static UWord stats__f_sk = 0;
5722
5723#if 0
5724# define STATS__F_SHOW \
5725 do { \
5726 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5727 VG_(printf)("filters: ac %lu sk %lu\n", \
5728 stats__f_ac, stats__f_sk); \
5729 } while (0)
5730#else
5731# define STATS__F_SHOW /* */
5732#endif
5733
5734void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5735 stats__f_ac++;
5736 STATS__F_SHOW;
5737 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5738 stats__f_sk++;
5739 return;
5740 }
5741 zsm_sapply08__msmcwrite(thr, a);
5742}
5743
5744void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5745 stats__f_ac++;
5746 STATS__F_SHOW;
5747 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5748 stats__f_sk++;
5749 return;
5750 }
5751 zsm_sapply16__msmcwrite(thr, a);
5752}
5753
5754void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5755 stats__f_ac++;
5756 STATS__F_SHOW;
5757 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5758 stats__f_sk++;
5759 return;
5760 }
5761 zsm_sapply32__msmcwrite(thr, a);
5762}
5763
5764void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5765 stats__f_ac++;
5766 STATS__F_SHOW;
5767 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5768 stats__f_sk++;
5769 return;
5770 }
5771 zsm_sapply64__msmcwrite(thr, a);
5772}
5773
5774void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5775{
5776 /* fast track a couple of common cases */
5777 if (len == 4 && aligned32(a)) {
5778 zsm_sapply32_f__msmcwrite( thr, a );
5779 return;
5780 }
5781 if (len == 8 && aligned64(a)) {
5782 zsm_sapply64_f__msmcwrite( thr, a );
5783 return;
5784 }
5785
5786 /* be completely general (but as efficient as possible) */
5787 if (len == 0) return;
5788
5789 if (!aligned16(a) && len >= 1) {
5790 zsm_sapply08_f__msmcwrite( thr, a );
5791 a += 1;
5792 len -= 1;
5793 tl_assert(aligned16(a));
5794 }
5795 if (len == 0) return;
5796
5797 if (!aligned32(a) && len >= 2) {
5798 zsm_sapply16_f__msmcwrite( thr, a );
5799 a += 2;
5800 len -= 2;
5801 tl_assert(aligned32(a));
5802 }
5803 if (len == 0) return;
5804
5805 if (!aligned64(a) && len >= 4) {
5806 zsm_sapply32_f__msmcwrite( thr, a );
5807 a += 4;
5808 len -= 4;
5809 tl_assert(aligned64(a));
5810 }
5811 if (len == 0) return;
5812
5813 if (len >= 8) {
5814 tl_assert(aligned64(a));
5815 while (len >= 8) {
5816 zsm_sapply64_f__msmcwrite( thr, a );
5817 a += 8;
5818 len -= 8;
5819 }
5820 tl_assert(aligned64(a));
5821 }
5822 if (len == 0) return;
5823
5824 if (len >= 4)
5825 tl_assert(aligned32(a));
5826 if (len >= 4) {
5827 zsm_sapply32_f__msmcwrite( thr, a );
5828 a += 4;
5829 len -= 4;
5830 }
5831 if (len == 0) return;
5832
5833 if (len >= 2)
5834 tl_assert(aligned16(a));
5835 if (len >= 2) {
5836 zsm_sapply16_f__msmcwrite( thr, a );
5837 a += 2;
5838 len -= 2;
5839 }
5840 if (len == 0) return;
5841
5842 if (len >= 1) {
5843 zsm_sapply08_f__msmcwrite( thr, a );
5844 //a += 1;
5845 len -= 1;
5846 }
5847 tl_assert(len == 0);
5848}
5849
5850void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5851 stats__f_ac++;
5852 STATS__F_SHOW;
5853 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5854 stats__f_sk++;
5855 return;
5856 }
5857 zsm_sapply08__msmcread(thr, a);
5858}
5859
5860void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5861 stats__f_ac++;
5862 STATS__F_SHOW;
5863 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5864 stats__f_sk++;
5865 return;
5866 }
5867 zsm_sapply16__msmcread(thr, a);
5868}
5869
5870void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5871 stats__f_ac++;
5872 STATS__F_SHOW;
5873 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5874 stats__f_sk++;
5875 return;
5876 }
5877 zsm_sapply32__msmcread(thr, a);
5878}
5879
5880void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5881 stats__f_ac++;
5882 STATS__F_SHOW;
5883 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5884 stats__f_sk++;
5885 return;
5886 }
5887 zsm_sapply64__msmcread(thr, a);
5888}
5889
5890void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5891{
5892 /* fast track a couple of common cases */
5893 if (len == 4 && aligned32(a)) {
5894 zsm_sapply32_f__msmcread( thr, a );
5895 return;
5896 }
5897 if (len == 8 && aligned64(a)) {
5898 zsm_sapply64_f__msmcread( thr, a );
5899 return;
5900 }
5901
5902 /* be completely general (but as efficient as possible) */
5903 if (len == 0) return;
5904
5905 if (!aligned16(a) && len >= 1) {
5906 zsm_sapply08_f__msmcread( thr, a );
5907 a += 1;
5908 len -= 1;
5909 tl_assert(aligned16(a));
5910 }
5911 if (len == 0) return;
5912
5913 if (!aligned32(a) && len >= 2) {
5914 zsm_sapply16_f__msmcread( thr, a );
5915 a += 2;
5916 len -= 2;
5917 tl_assert(aligned32(a));
5918 }
5919 if (len == 0) return;
5920
5921 if (!aligned64(a) && len >= 4) {
5922 zsm_sapply32_f__msmcread( thr, a );
5923 a += 4;
5924 len -= 4;
5925 tl_assert(aligned64(a));
5926 }
5927 if (len == 0) return;
5928
5929 if (len >= 8) {
5930 tl_assert(aligned64(a));
5931 while (len >= 8) {
5932 zsm_sapply64_f__msmcread( thr, a );
5933 a += 8;
5934 len -= 8;
5935 }
5936 tl_assert(aligned64(a));
5937 }
5938 if (len == 0) return;
5939
5940 if (len >= 4)
5941 tl_assert(aligned32(a));
5942 if (len >= 4) {
5943 zsm_sapply32_f__msmcread( thr, a );
5944 a += 4;
5945 len -= 4;
5946 }
5947 if (len == 0) return;
5948
5949 if (len >= 2)
5950 tl_assert(aligned16(a));
5951 if (len >= 2) {
5952 zsm_sapply16_f__msmcread( thr, a );
5953 a += 2;
5954 len -= 2;
5955 }
5956 if (len == 0) return;
5957
5958 if (len >= 1) {
5959 zsm_sapply08_f__msmcread( thr, a );
5960 //a += 1;
5961 len -= 1;
5962 }
5963 tl_assert(len == 0);
5964}
5965
5966void libhb_Thr_resumes ( Thr* thr )
5967{
5968 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005969 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00005970 tl_assert(!thr->llexit_done);
sewardj23f12002009-07-24 08:45:08 +00005971 Filter__clear(thr->filter, "libhb_Thr_resumes");
5972 /* A kludge, but .. if this thread doesn't have any marker stacks
5973 at all, get one right now. This is easier than figuring out
5974 exactly when at thread startup we can and can't take a stack
5975 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00005976 if (HG_(clo_history_level) == 1) {
5977 tl_assert(thr->local_Kws_n_stacks);
5978 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
5979 note_local_Kw_n_stack_for(thr);
5980 }
sewardj23f12002009-07-24 08:45:08 +00005981}
5982
5983
5984/////////////////////////////////////////////////////////
5985// //
sewardjf98e1c02008-10-25 16:22:41 +00005986// Synchronisation objects //
5987// //
5988/////////////////////////////////////////////////////////
5989
sewardjffce8152011-06-24 10:09:41 +00005990/* A double linked list of all the SO's. */
5991SO* admin_SO = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00005992
sewardjffce8152011-06-24 10:09:41 +00005993static SO* SO__Alloc ( void )
5994{
sewardjf98e1c02008-10-25 16:22:41 +00005995 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
5996 so->viR = VtsID_INVALID;
5997 so->viW = VtsID_INVALID;
5998 so->magic = SO_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00005999 /* Add to double linked list */
6000 if (admin_SO) {
6001 tl_assert(admin_SO->admin_prev == NULL);
6002 admin_SO->admin_prev = so;
6003 so->admin_next = admin_SO;
6004 } else {
6005 so->admin_next = NULL;
6006 }
6007 so->admin_prev = NULL;
6008 admin_SO = so;
6009 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006010 return so;
6011}
sewardjffce8152011-06-24 10:09:41 +00006012
6013static void SO__Dealloc ( SO* so )
6014{
sewardjf98e1c02008-10-25 16:22:41 +00006015 tl_assert(so);
6016 tl_assert(so->magic == SO_MAGIC);
6017 if (so->viR == VtsID_INVALID) {
6018 tl_assert(so->viW == VtsID_INVALID);
6019 } else {
6020 tl_assert(so->viW != VtsID_INVALID);
6021 VtsID__rcdec(so->viR);
6022 VtsID__rcdec(so->viW);
6023 }
6024 so->magic = 0;
sewardjffce8152011-06-24 10:09:41 +00006025 /* Del from double linked list */
6026 if (so->admin_prev)
6027 so->admin_prev->admin_next = so->admin_next;
6028 if (so->admin_next)
6029 so->admin_next->admin_prev = so->admin_prev;
6030 if (so == admin_SO)
6031 admin_SO = so->admin_next;
6032 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006033 HG_(free)( so );
6034}
6035
6036
6037/////////////////////////////////////////////////////////
6038// //
6039// Top Level API //
6040// //
6041/////////////////////////////////////////////////////////
6042
florian6bd9dc12012-11-23 16:17:43 +00006043static void show_thread_state ( const HChar* str, Thr* t )
sewardjf98e1c02008-10-25 16:22:41 +00006044{
6045 if (1) return;
6046 if (t->viR == t->viW) {
6047 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6048 VtsID__pp( t->viR );
6049 VG_(printf)("%s","\n");
6050 } else {
6051 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6052 VtsID__pp( t->viR );
6053 VG_(printf)(" viW %u==", t->viW);
6054 VtsID__pp( t->viW );
6055 VG_(printf)("%s","\n");
6056 }
6057}
6058
6059
6060Thr* libhb_init (
6061 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00006062 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00006063 )
6064{
6065 Thr* thr;
6066 VtsID vi;
sewardje4cce742011-02-24 15:25:24 +00006067
6068 // We will have to have to store a large number of these,
6069 // so make sure they're the size we expect them to be.
6070 tl_assert(sizeof(ScalarTS) == 8);
sewardjffce8152011-06-24 10:09:41 +00006071
6072 /* because first 1024 unusable */
6073 tl_assert(SCALARTS_N_THRBITS >= 11);
6074 /* so as to fit in a UInt w/ 3 bits to spare (see defn of
6075 Thr_n_RCEC). */
6076 tl_assert(SCALARTS_N_THRBITS <= 29);
6077
6078 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6079 (32-bit). It's not correctness-critical, but there are a lot of
6080 them, so it's important from a space viewpoint. Unfortunately
6081 we simply can't pack it into 2 words on a 32-bit target. */
6082 if (sizeof(UWord) == 8) {
6083 tl_assert(sizeof(Thr_n_RCEC) == 16);
6084 } else {
6085 tl_assert(sizeof(Thr_n_RCEC) == 12);
6086 }
6087
6088 /* Word sets really are 32 bits. Even on a 64 bit target. */
6089 tl_assert(sizeof(WordSetID) == 4);
6090 tl_assert(sizeof(WordSet) == sizeof(WordSetID));
sewardje4cce742011-02-24 15:25:24 +00006091
sewardjf98e1c02008-10-25 16:22:41 +00006092 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00006093 tl_assert(get_EC);
6094 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00006095 main_get_EC = get_EC;
6096
6097 // No need to initialise hg_wordfm.
6098 // No need to initialise hg_wordset.
6099
sewardj7aa38a92011-02-27 23:04:12 +00006100 /* Allocated once and never deallocated. Used as a temporary in
6101 VTS singleton, tick and join operations. */
6102 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6103 temp_max_sized_VTS->id = VtsID_INVALID;
philippec3508652015-03-28 12:01:58 +00006104 verydead_thread_tables_init();
sewardjf98e1c02008-10-25 16:22:41 +00006105 vts_set_init();
6106 vts_tab_init();
6107 event_map_init();
6108 VtsID__invalidate_caches();
6109
6110 // initialise shadow memory
6111 zsm_init( SVal__rcinc, SVal__rcdec );
6112
6113 thr = Thr__new();
6114 vi = VtsID__mk_Singleton( thr, 1 );
6115 thr->viR = vi;
6116 thr->viW = vi;
6117 VtsID__rcinc(thr->viR);
6118 VtsID__rcinc(thr->viW);
6119
6120 show_thread_state(" root", thr);
6121 return thr;
6122}
6123
sewardj23f12002009-07-24 08:45:08 +00006124
sewardjf98e1c02008-10-25 16:22:41 +00006125Thr* libhb_create ( Thr* parent )
6126{
6127 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6128 the child's index. Since the child's index is guaranteed
6129 unique, it has never been seen before, so the implicit value
6130 before the tick is zero and after that is one. */
6131 Thr* child = Thr__new();
6132
6133 child->viR = VtsID__tick( parent->viR, child );
6134 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00006135 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00006136 VtsID__rcinc(child->viR);
6137 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006138 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00006139 early for that - it may not have a valid TId yet. So, let
6140 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00006141
6142 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6143 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6144
6145 /* and the parent has to move along too */
6146 VtsID__rcdec(parent->viR);
6147 VtsID__rcdec(parent->viW);
6148 parent->viR = VtsID__tick( parent->viR, parent );
6149 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00006150 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00006151 VtsID__rcinc(parent->viR);
6152 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006153 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00006154
6155 show_thread_state(" child", child);
6156 show_thread_state("parent", parent);
6157
6158 return child;
6159}
6160
6161/* Shut down the library, and print stats (in fact that's _all_
6162 this is for. */
6163void libhb_shutdown ( Bool show_stats )
6164{
6165 if (show_stats) {
6166 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6167 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6168 stats__secmaps_allocd,
6169 stats__secmap_ga_space_covered);
6170 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6171 stats__secmap_linesZ_allocd,
6172 stats__secmap_linesZ_bytes);
6173 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
6174 stats__secmap_linesF_allocd,
6175 stats__secmap_linesF_bytes);
6176 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
6177 stats__secmap_iterator_steppings);
6178 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6179 stats__secmaps_search, stats__secmaps_search_slow);
6180
6181 VG_(printf)("%s","\n");
6182 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6183 stats__cache_totrefs, stats__cache_totmisses );
6184 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6185 stats__cache_Z_fetches, stats__cache_F_fetches );
6186 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6187 stats__cache_Z_wbacks, stats__cache_F_wbacks );
6188 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
6189 stats__cache_invals, stats__cache_flushes );
6190 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6191 stats__cache_make_New_arange,
6192 stats__cache_make_New_inZrep);
6193
6194 VG_(printf)("%s","\n");
6195 VG_(printf)(" cline: %'10lu normalises\n",
6196 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00006197 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6198 stats__cline_cread64s,
6199 stats__cline_cread32s,
6200 stats__cline_cread16s,
6201 stats__cline_cread08s );
6202 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6203 stats__cline_cwrite64s,
6204 stats__cline_cwrite32s,
6205 stats__cline_cwrite16s,
6206 stats__cline_cwrite08s );
6207 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6208 stats__cline_swrite64s,
6209 stats__cline_swrite32s,
6210 stats__cline_swrite16s,
6211 stats__cline_swrite08s );
6212 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6213 stats__cline_sread08s, stats__cline_scopy08s );
sewardjf98e1c02008-10-25 16:22:41 +00006214 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
6215 stats__cline_64to32splits,
6216 stats__cline_32to16splits,
6217 stats__cline_16to8splits );
6218 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
6219 stats__cline_64to32pulldown,
6220 stats__cline_32to16pulldown,
6221 stats__cline_16to8pulldown );
6222 if (0)
6223 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
6224 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
6225
6226 VG_(printf)("%s","\n");
6227
sewardjc8028ad2010-05-05 09:34:42 +00006228 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006229 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00006230 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006231 stats__msmcwrite, stats__msmcwrite_change);
6232 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6233 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00006234 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6235 stats__join2_queries, stats__join2_misses);
6236
6237 VG_(printf)("%s","\n");
sewardjc8028ad2010-05-05 09:34:42 +00006238 VG_(printf)( " libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6239 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6240 VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6241 stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
sewardj7aa38a92011-02-27 23:04:12 +00006242 VG_(printf)( " libhb: VTSset: find__or__clone_and_add %'lu (%'lu allocd)\n",
6243 stats__vts_set__focaa, stats__vts_set__focaa_a );
sewardjc8028ad2010-05-05 09:34:42 +00006244 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6245 stats__vts__indexat_slow );
6246
6247 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00006248 VG_(printf)(
6249 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6250 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6251 );
6252 VG_(printf)( " libhb: %lu entries in vts_set\n",
6253 VG_(sizeFM)( vts_set ) );
6254
6255 VG_(printf)("%s","\n");
philippe900c5352015-03-24 14:02:44 +00006256 {
6257 UInt live = 0;
6258 UInt llexit_done = 0;
6259 UInt joinedwith_done = 0;
6260 UInt llexit_and_joinedwith_done = 0;
6261
6262 Thread* hgthread = get_admin_threads();
6263 tl_assert(hgthread);
6264 while (hgthread) {
6265 Thr* hbthr = hgthread->hbthr;
6266 tl_assert(hbthr);
6267 if (hbthr->llexit_done && hbthr->joinedwith_done)
6268 llexit_and_joinedwith_done++;
6269 else if (hbthr->llexit_done)
6270 llexit_done++;
6271 else if (hbthr->joinedwith_done)
6272 joinedwith_done++;
6273 else
6274 live++;
6275 hgthread = hgthread->admin;
6276 }
6277 VG_(printf)(" libhb: threads live: %d exit_and_joinedwith %d"
6278 " exit %d joinedwith %d\n",
6279 live, llexit_and_joinedwith_done,
6280 llexit_done, joinedwith_done);
philippec3508652015-03-28 12:01:58 +00006281 VG_(printf)(" libhb: %d verydead_threads, "
6282 "%d verydead_threads_not_pruned\n",
6283 (int) VG_(sizeXA)( verydead_thread_table),
6284 (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6285 tl_assert (VG_(sizeXA)( verydead_thread_table)
6286 + VG_(sizeXA)( verydead_thread_table_not_pruned)
6287 == llexit_and_joinedwith_done);
philippe900c5352015-03-24 14:02:44 +00006288 }
6289
6290 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00006291 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
6292 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
6293 stats__ctxt_rcdec2,
6294 stats__ctxt_rcdec3 );
6295 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
6296 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
philippe06bc23a2015-04-17 21:19:43 +00006297 VG_(printf)( " libhb: contextTab: %lu slots, %lu cur ents,"
6298 " %lu max ents\n",
sewardjf98e1c02008-10-25 16:22:41 +00006299 (UWord)N_RCEC_TAB,
philippe06bc23a2015-04-17 21:19:43 +00006300 stats__ctxt_tab_curr, stats__ctxt_tab_max );
sewardjf98e1c02008-10-25 16:22:41 +00006301 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6302 stats__ctxt_tab_qs,
6303 stats__ctxt_tab_cmps );
6304#if 0
6305 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6306 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6307 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6308 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6309 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6310 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6311 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6312 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6313 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6314 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6315 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6316 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6317 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6318 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6319
6320 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6321 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6322 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6323 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6324#endif
6325
6326 VG_(printf)("%s","<<< END libhb stats >>>\n");
6327 VG_(printf)("%s","\n");
6328
6329 }
6330}
6331
sewardjffce8152011-06-24 10:09:41 +00006332/* Receive notification that a thread has low level exited. The
6333 significance here is that we do not expect to see any more memory
6334 references from it. */
sewardjf98e1c02008-10-25 16:22:41 +00006335void libhb_async_exit ( Thr* thr )
6336{
sewardj23f12002009-07-24 08:45:08 +00006337 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006338 tl_assert(!thr->llexit_done);
6339 thr->llexit_done = True;
sewardj2d2ea2f2009-08-02 10:15:07 +00006340
6341 /* free up Filter and local_Kws_n_stacks (well, actually not the
6342 latter ..) */
6343 tl_assert(thr->filter);
6344 HG_(free)(thr->filter);
6345 thr->filter = NULL;
6346
sewardjffce8152011-06-24 10:09:41 +00006347 /* Tell the VTS mechanism this thread has exited, so it can
6348 participate in VTS pruning. Note this can only happen if the
6349 thread has both ll_exited and has been joined with. */
6350 if (thr->joinedwith_done)
6351 VTS__declare_thread_very_dead(thr);
6352
sewardj2d2ea2f2009-08-02 10:15:07 +00006353 /* Another space-accuracy tradeoff. Do we want to be able to show
6354 H1 history for conflicts in threads which have since exited? If
6355 yes, then we better not free up thr->local_Kws_n_stacks. The
6356 downside is a potential per-thread leak of up to
6357 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6358 XArray average overcommit factor is (1.5 I'd guess). */
6359 // hence:
6360 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6361 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006362}
6363
sewardjffce8152011-06-24 10:09:41 +00006364/* Receive notification that a thread has been joined with. The
6365 significance here is that we do not expect to see any further
6366 references to its vector clocks (Thr::viR and Thr::viW). */
6367void libhb_joinedwith_done ( Thr* thr )
6368{
6369 tl_assert(thr);
6370 /* Caller must ensure that this is only ever called once per Thr. */
6371 tl_assert(!thr->joinedwith_done);
6372 thr->joinedwith_done = True;
6373 if (thr->llexit_done)
6374 VTS__declare_thread_very_dead(thr);
6375}
6376
6377
sewardjf98e1c02008-10-25 16:22:41 +00006378/* Both Segs and SOs point to VTSs. However, there is no sharing, so
6379 a Seg that points at a VTS is its one-and-only owner, and ditto for
6380 a SO that points at a VTS. */
6381
6382SO* libhb_so_alloc ( void )
6383{
6384 return SO__Alloc();
6385}
6386
6387void libhb_so_dealloc ( SO* so )
6388{
6389 tl_assert(so);
6390 tl_assert(so->magic == SO_MAGIC);
6391 SO__Dealloc(so);
6392}
6393
6394/* See comments in libhb.h for details on the meaning of
6395 strong vs weak sends and strong vs weak receives. */
6396void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6397{
6398 /* Copy the VTSs from 'thr' into the sync object, and then move
6399 the thread along one step. */
6400
6401 tl_assert(so);
6402 tl_assert(so->magic == SO_MAGIC);
6403
6404 /* stay sane .. a thread's read-clock must always lead or be the
6405 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00006406 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6407 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00006408 }
6409
6410 /* since we're overwriting the VtsIDs in the SO, we need to drop
6411 any references made by the previous contents thereof */
6412 if (so->viR == VtsID_INVALID) {
6413 tl_assert(so->viW == VtsID_INVALID);
6414 so->viR = thr->viR;
6415 so->viW = thr->viW;
6416 VtsID__rcinc(so->viR);
6417 VtsID__rcinc(so->viW);
6418 } else {
6419 /* In a strong send, we dump any previous VC in the SO and
6420 install the sending thread's VC instead. For a weak send we
6421 must join2 with what's already there. */
6422 tl_assert(so->viW != VtsID_INVALID);
6423 VtsID__rcdec(so->viR);
6424 VtsID__rcdec(so->viW);
6425 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6426 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6427 VtsID__rcinc(so->viR);
6428 VtsID__rcinc(so->viW);
6429 }
6430
6431 /* move both parent clocks along */
6432 VtsID__rcdec(thr->viR);
6433 VtsID__rcdec(thr->viW);
6434 thr->viR = VtsID__tick( thr->viR, thr );
6435 thr->viW = VtsID__tick( thr->viW, thr );
sewardjffce8152011-06-24 10:09:41 +00006436 if (!thr->llexit_done) {
sewardj2d2ea2f2009-08-02 10:15:07 +00006437 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00006438 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006439 }
sewardjf98e1c02008-10-25 16:22:41 +00006440 VtsID__rcinc(thr->viR);
6441 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006442
sewardjf98e1c02008-10-25 16:22:41 +00006443 if (strong_send)
6444 show_thread_state("s-send", thr);
6445 else
6446 show_thread_state("w-send", thr);
6447}
6448
6449void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6450{
6451 tl_assert(so);
6452 tl_assert(so->magic == SO_MAGIC);
6453
6454 if (so->viR != VtsID_INVALID) {
6455 tl_assert(so->viW != VtsID_INVALID);
6456
6457 /* Weak receive (basically, an R-acquisition of a R-W lock).
6458 This advances the read-clock of the receiver, but not the
6459 write-clock. */
6460 VtsID__rcdec(thr->viR);
6461 thr->viR = VtsID__join2( thr->viR, so->viR );
6462 VtsID__rcinc(thr->viR);
6463
sewardj90eb22e2009-07-28 20:22:18 +00006464 /* At one point (r10589) it seemed safest to tick the clocks for
6465 the receiving thread after the join. But on reflection, I
6466 wonder if that might cause it to 'overtake' constraints,
6467 which could lead to missing races. So, back out that part of
6468 r10589. */
6469 //VtsID__rcdec(thr->viR);
6470 //thr->viR = VtsID__tick( thr->viR, thr );
6471 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00006472
sewardjf98e1c02008-10-25 16:22:41 +00006473 /* For a strong receive, we also advance the receiver's write
6474 clock, which means the receive as a whole is essentially
6475 equivalent to a W-acquisition of a R-W lock. */
6476 if (strong_recv) {
6477 VtsID__rcdec(thr->viW);
6478 thr->viW = VtsID__join2( thr->viW, so->viW );
6479 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006480
sewardj90eb22e2009-07-28 20:22:18 +00006481 /* See comment just above, re r10589. */
6482 //VtsID__rcdec(thr->viW);
6483 //thr->viW = VtsID__tick( thr->viW, thr );
6484 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00006485 }
6486
sewardjf4845dc2010-05-28 20:09:59 +00006487 if (thr->filter)
6488 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00006489 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00006490
sewardjf98e1c02008-10-25 16:22:41 +00006491 if (strong_recv)
6492 show_thread_state("s-recv", thr);
6493 else
6494 show_thread_state("w-recv", thr);
6495
6496 } else {
6497 tl_assert(so->viW == VtsID_INVALID);
6498 /* Deal with degenerate case: 'so' has no vts, so there has been
6499 no message posted to it. Just ignore this case. */
6500 show_thread_state("d-recv", thr);
6501 }
6502}
6503
6504Bool libhb_so_everSent ( SO* so )
6505{
6506 if (so->viR == VtsID_INVALID) {
6507 tl_assert(so->viW == VtsID_INVALID);
6508 return False;
6509 } else {
6510 tl_assert(so->viW != VtsID_INVALID);
6511 return True;
6512 }
6513}
6514
6515#define XXX1 0 // 0x67a106c
6516#define XXX2 0
6517
sewardj23f12002009-07-24 08:45:08 +00006518static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00006519 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6520 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6521 return False;
6522}
florian0c8a47c2013-10-01 20:10:21 +00006523static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
florian6bf37262012-10-21 03:23:36 +00006524{
sewardj23f12002009-07-24 08:45:08 +00006525 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00006526 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6527 show_thread_state("", thr);
6528 VG_(printf)("%s","\n");
6529}
6530
sewardj23f12002009-07-24 08:45:08 +00006531void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006532{
6533 SVal sv = SVal__mkC(thr->viW, thr->viW);
6534 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00006535 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6536 zsm_sset_range( a, szB, sv );
6537 Filter__clear_range( thr->filter, a, szB );
6538 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00006539}
6540
sewardjfd35d492011-03-17 19:39:55 +00006541void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006542{
sewardj23f12002009-07-24 08:45:08 +00006543 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00006544}
6545
sewardjfd35d492011-03-17 19:39:55 +00006546void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6547{
6548 /* This really does put the requested range in NoAccess. It's
6549 expensive though. */
6550 SVal sv = SVal_NOACCESS;
6551 tl_assert(is_sane_SVal_C(sv));
6552 zsm_sset_range( a, szB, sv );
6553 Filter__clear_range( thr->filter, a, szB );
6554}
6555
sewardj406bac82010-03-03 23:03:40 +00006556void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
6557{
6558 SVal sv = SVal_NOACCESS;
6559 tl_assert(is_sane_SVal_C(sv));
6560 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
6561 zsm_sset_range( a, szB, sv );
6562 Filter__clear_range( thr->filter, a, szB );
6563 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
6564}
6565
sewardj0b20a152011-03-10 21:34:21 +00006566Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
sewardjf98e1c02008-10-25 16:22:41 +00006567 tl_assert(thr);
sewardj60626642011-03-10 15:14:37 +00006568 return thr->hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006569}
6570
sewardj0b20a152011-03-10 21:34:21 +00006571void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
sewardjf98e1c02008-10-25 16:22:41 +00006572 tl_assert(thr);
sewardj0b20a152011-03-10 21:34:21 +00006573 thr->hgthread = hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006574}
6575
sewardj23f12002009-07-24 08:45:08 +00006576void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00006577{
sewardj23f12002009-07-24 08:45:08 +00006578 zsm_scopy_range(src, dst, len);
6579 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00006580}
6581
6582void libhb_maybe_GC ( void )
6583{
philippe158404e2015-04-10 19:34:14 +00006584 if (UNLIKELY(oldrefTreeN >= HG_(clo_conflict_cache_size)))
6585 event_map_GC();
6586
sewardjf98e1c02008-10-25 16:22:41 +00006587 /* If there are still freelist entries available, no need for a
6588 GC. */
6589 if (vts_tab_freelist != VtsID_INVALID)
6590 return;
6591 /* So all the table entries are full, and we're having to expand
6592 the table. But did we hit the threshhold point yet? */
6593 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
6594 return;
6595 vts_tab__do_GC( False/*don't show stats*/ );
6596}
6597
6598
6599/////////////////////////////////////////////////////////////////
6600/////////////////////////////////////////////////////////////////
6601// //
6602// SECTION END main library //
6603// //
6604/////////////////////////////////////////////////////////////////
6605/////////////////////////////////////////////////////////////////
6606
6607/*--------------------------------------------------------------------*/
6608/*--- end libhb_main.c ---*/
6609/*--------------------------------------------------------------------*/