blob: e55620291a8406c44eaa0cde2b36fe824594d427 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
sewardj0f157dd2013-10-18 14:27:36 +000012 Copyright (C) 2008-2013 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000034#include "pub_tool_poolalloc.h"
sewardjf98e1c02008-10-25 16:22:41 +000035#include "pub_tool_libcassert.h"
36#include "pub_tool_libcbase.h"
37#include "pub_tool_libcprint.h"
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000040#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000041#include "pub_tool_xarray.h"
42#include "pub_tool_oset.h"
43#include "pub_tool_threadstate.h"
44#include "pub_tool_aspacemgr.h"
45#include "pub_tool_execontext.h"
46#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000047#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000048#include "hg_basics.h"
49#include "hg_wordset.h"
50#include "hg_lock_n_thread.h"
51#include "hg_errors.h"
52
53#include "libhb.h"
54
55
sewardj8f5374e2008-12-07 11:40:17 +000056/////////////////////////////////////////////////////////////////
57/////////////////////////////////////////////////////////////////
58// //
59// Debugging #defines //
60// //
61/////////////////////////////////////////////////////////////////
62/////////////////////////////////////////////////////////////////
63
64/* Check the sanity of shadow values in the core memory state
65 machine. Change #if 0 to #if 1 to enable this. */
66#if 0
67# define CHECK_MSM 1
68#else
69# define CHECK_MSM 0
70#endif
71
72
73/* Check sanity (reference counts, etc) in the conflicting access
74 machinery. Change #if 0 to #if 1 to enable this. */
75#if 0
76# define CHECK_CEM 1
77#else
78# define CHECK_CEM 0
79#endif
80
81
82/* Check sanity in the compressed shadow memory machinery,
83 particularly in its caching innards. Unfortunately there's no
84 almost-zero-cost way to make them selectable at run time. Hence
85 set the #if 0 to #if 1 and rebuild if you want them. */
86#if 0
87# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
88# define inline __attribute__((noinline))
89 /* probably want to ditch -fomit-frame-pointer too */
90#else
91# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
92#endif
93
94
95/////////////////////////////////////////////////////////////////
96/////////////////////////////////////////////////////////////////
97// //
sewardjffce8152011-06-24 10:09:41 +000098// data decls: VtsID //
99// //
100/////////////////////////////////////////////////////////////////
101/////////////////////////////////////////////////////////////////
102
103/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
104 bits, since they have to be packed into the lowest 30 bits of an
105 SVal. */
106typedef UInt VtsID;
107#define VtsID_INVALID 0xFFFFFFFF
108
109
110
111/////////////////////////////////////////////////////////////////
112/////////////////////////////////////////////////////////////////
113// //
114// data decls: SVal //
115// //
116/////////////////////////////////////////////////////////////////
117/////////////////////////////////////////////////////////////////
118
119typedef ULong SVal;
120
121/* This value has special significance to the implementation, and callers
122 may not store it in the shadow memory. */
123#define SVal_INVALID (3ULL << 62)
124
125/* This is the default value for shadow memory. Initially the shadow
126 memory contains no accessible areas and so all reads produce this
127 value. TODO: make this caller-defineable. */
128#define SVal_NOACCESS (2ULL << 62)
129
130
131
132/////////////////////////////////////////////////////////////////
133/////////////////////////////////////////////////////////////////
134// //
135// data decls: ScalarTS //
136// //
137/////////////////////////////////////////////////////////////////
138/////////////////////////////////////////////////////////////////
139
140/* Scalar Timestamp. We have to store a lot of these, so there is
141 some effort to make them as small as possible. Logically they are
142 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
143 We pack it into 64 bits by representing the Thr* using a ThrID, a
144 small integer (18 bits), and a 46 bit integer for the timestamp
145 number. The 46/18 split is arbitary, but has the effect that
146 Helgrind can only handle programs that create 2^18 or fewer threads
147 over their entire lifetime, and have no more than 2^46 timestamp
148 ticks (synchronisation operations on the same thread).
149
150 This doesn't seem like much of a limitation. 2^46 ticks is
151 7.06e+13, and if each tick (optimistically) takes the machine 1000
152 cycles to process, then the minimum time to process that many ticks
153 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
154 but VTS ticks, which isn't realistic.
155
156 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
157 32 since a ThrID is a UInt. 29 comes from the fact that
158 'Thr_n_RCEC', which records information about old accesses, packs
159 not only a ThrID but also 2+1 other bits (access size and
160 writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
161
162 NB2: thrid values are issued upwards from 1024, and values less
163 than that aren't valid. This isn't per se necessary (any order
164 will do, so long as they are unique), but it does help ensure they
165 are less likely to get confused with the various other kinds of
166 small-integer thread ids drifting around (eg, TId). See also NB5.
167
168 NB3: this probably also relies on the fact that Thr's are never
169 deallocated -- they exist forever. Hence the 1-1 mapping from
170 Thr's to thrid values (set up in Thr__new) persists forever.
171
172 NB4: temp_max_sized_VTS is allocated at startup and never freed.
173 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
174 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
175 making the memory use for this go sky-high. With
176 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
177 like an OK tradeoff. If more than 256k threads need to be
178 supported, we could change SCALARTS_N_THRBITS to 20, which would
179 facilitate supporting 1 million threads at the cost of 8MB storage
180 for temp_max_sized_VTS.
181
182 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
183 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
184 must never be a valid ThrID. Given NB2 that's OK.
185*/
186#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
187
188#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
189typedef
190 struct {
191 ThrID thrid : SCALARTS_N_THRBITS;
192 ULong tym : SCALARTS_N_TYMBITS;
193 }
194 ScalarTS;
195
196#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
197
198
199
200/////////////////////////////////////////////////////////////////
201/////////////////////////////////////////////////////////////////
202// //
203// data decls: Filter //
204// //
205/////////////////////////////////////////////////////////////////
206/////////////////////////////////////////////////////////////////
207
208// baseline: 5, 9
209#define FI_LINE_SZB_LOG2 5
210#define FI_NUM_LINES_LOG2 10
211
212#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
213#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
214
215#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
216#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
217
218#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
219 & (Addr)(FI_NUM_LINES-1) )
220
221
222/* In the lines, each 8 bytes are treated individually, and are mapped
223 to a UShort. Regardless of endianness of the underlying machine,
224 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
225 the highest address.
226
227 Of each bit pair, the higher numbered bit is set if a R has been
228 seen, so the actual layout is:
229
230 15 14 ... 01 00
231
232 R W for addr+7 ... R W for addr+0
233
234 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
235*/
236
237/* tags are separated from lines. tags are Addrs and are
238 the base address of the line. */
239typedef
240 struct {
241 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
242 }
243 FiLine;
244
245typedef
246 struct {
247 Addr tags[FI_NUM_LINES];
248 FiLine lines[FI_NUM_LINES];
249 }
250 Filter;
251
252
253
254/////////////////////////////////////////////////////////////////
255/////////////////////////////////////////////////////////////////
256// //
257// data decls: Thr, ULong_n_EC //
258// //
259/////////////////////////////////////////////////////////////////
260/////////////////////////////////////////////////////////////////
261
262// Records stacks for H1 history mechanism (DRD-style)
263typedef
264 struct { ULong ull; ExeContext* ec; }
265 ULong_n_EC;
266
267
268/* How many of the above records to collect for each thread? Older
269 ones are dumped when we run out of space. 62.5k requires 1MB per
270 thread, since each ULong_n_EC record is 16 bytes long. When more
271 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
272 deleted to make space. Hence in the worst case we will be able to
273 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
274 Kw transitions (segments in this thread). For the current setting
275 that gives a guaranteed stack for at least the last 31.25k
276 segments. */
277#define N_KWs_N_STACKs_PER_THREAD 62500
278
279
280struct _Thr {
281 /* Current VTSs for this thread. They change as we go along. viR
282 is the VTS to be used for reads, viW for writes. Usually they
283 are the same, but can differ when we deal with reader-writer
284 locks. It is always the case that
285 VtsID__cmpLEQ(viW,viR) == True
286 that is, viW must be the same, or lagging behind, viR. */
287 VtsID viR;
288 VtsID viW;
289
290 /* Is initially False, and is set to True after the thread really
291 has done a low-level exit. When True, we expect to never see
292 any more memory references done by this thread. */
293 Bool llexit_done;
294
295 /* Is initially False, and is set to True after the thread has been
296 joined with (reaped by some other thread). After this point, we
297 do not expect to see any uses of .viR or .viW, so it is safe to
298 set them to VtsID_INVALID. */
299 Bool joinedwith_done;
300
301 /* A small integer giving a unique identity to this Thr. See
302 comments on the definition of ScalarTS for details. */
303 ThrID thrid : SCALARTS_N_THRBITS;
304
305 /* A filter that removes references for which we believe that
306 msmcread/msmcwrite will not change the state, nor report a
307 race. */
308 Filter* filter;
309
310 /* A pointer back to the top level Thread structure. There is a
311 1-1 mapping between Thread and Thr structures -- each Thr points
312 at its corresponding Thread, and vice versa. Really, Thr and
313 Thread should be merged into a single structure. */
314 Thread* hgthread;
315
316 /* The ULongs (scalar Kws) in this accumulate in strictly
317 increasing order, without duplicates. This is important because
318 we need to be able to find a given scalar Kw in this array
319 later, by binary search. */
320 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
321};
322
323
324
325/////////////////////////////////////////////////////////////////
326/////////////////////////////////////////////////////////////////
327// //
328// data decls: SO //
329// //
330/////////////////////////////////////////////////////////////////
331/////////////////////////////////////////////////////////////////
332
333// (UInt) `echo "Synchronisation object" | md5sum`
334#define SO_MAGIC 0x56b3c5b0U
335
336struct _SO {
337 struct _SO* admin_prev;
338 struct _SO* admin_next;
339 VtsID viR; /* r-clock of sender */
340 VtsID viW; /* w-clock of sender */
341 UInt magic;
342};
343
344
345
346/////////////////////////////////////////////////////////////////
347/////////////////////////////////////////////////////////////////
348// //
sewardj8f5374e2008-12-07 11:40:17 +0000349// Forward declarations //
350// //
351/////////////////////////////////////////////////////////////////
352/////////////////////////////////////////////////////////////////
353
sewardjf98e1c02008-10-25 16:22:41 +0000354/* fwds for
355 Globals needed by other parts of the library. These are set
356 once at startup and then never changed. */
357static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000358static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000359
sewardjffce8152011-06-24 10:09:41 +0000360/* misc fn and data fwdses */
361static void VtsID__rcinc ( VtsID ii );
362static void VtsID__rcdec ( VtsID ii );
363
364static inline Bool SVal__isC ( SVal s );
365static inline VtsID SVal__unC_Rmin ( SVal s );
366static inline VtsID SVal__unC_Wmin ( SVal s );
367static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
368
369/* A double linked list of all the SO's. */
370SO* admin_SO;
371
sewardjf98e1c02008-10-25 16:22:41 +0000372
373
374/////////////////////////////////////////////////////////////////
375/////////////////////////////////////////////////////////////////
376// //
377// SECTION BEGIN compressed shadow memory //
378// //
379/////////////////////////////////////////////////////////////////
380/////////////////////////////////////////////////////////////////
381
382#ifndef __HB_ZSM_H
383#define __HB_ZSM_H
384
sewardjf98e1c02008-10-25 16:22:41 +0000385/* Initialise the library. Once initialised, it will (or may) call
386 rcinc and rcdec in response to all the calls below, in order to
387 allow the user to do reference counting on the SVals stored herein.
388 It is important to understand, however, that due to internal
389 caching, the reference counts are in general inaccurate, and can be
390 both above or below the true reference count for an item. In
391 particular, the library may indicate that the reference count for
392 an item is zero, when in fact it is not.
393
394 To make the reference counting exact and therefore non-pointless,
395 call zsm_flush_cache. Immediately after it returns, the reference
396 counts for all items, as deduced by the caller by observing calls
397 to rcinc and rcdec, will be correct, and so any items with a zero
398 reference count may be freed (or at least considered to be
399 unreferenced by this library).
400*/
401static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
402
sewardj23f12002009-07-24 08:45:08 +0000403static void zsm_sset_range ( Addr, SizeT, SVal );
404static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000405static void zsm_flush_cache ( void );
406
407#endif /* ! __HB_ZSM_H */
408
409
sewardjf98e1c02008-10-25 16:22:41 +0000410/* Round a up to the next multiple of N. N must be a power of 2 */
411#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
412/* Round a down to the next multiple of N. N must be a power of 2 */
413#define ROUNDDN(a, N) ((a) & ~(N-1))
414
415
416
417/* ------ User-supplied RC functions ------ */
418static void(*rcinc)(SVal) = NULL;
419static void(*rcdec)(SVal) = NULL;
420
421
422/* ------ CacheLine ------ */
423
424#define N_LINE_BITS 6 /* must be >= 3 */
425#define N_LINE_ARANGE (1 << N_LINE_BITS)
426#define N_LINE_TREES (N_LINE_ARANGE >> 3)
427
428typedef
429 struct {
430 UShort descrs[N_LINE_TREES];
431 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
432 }
433 CacheLine;
434
435#define TREE_DESCR_16_0 (1<<0)
436#define TREE_DESCR_32_0 (1<<1)
437#define TREE_DESCR_16_1 (1<<2)
438#define TREE_DESCR_64 (1<<3)
439#define TREE_DESCR_16_2 (1<<4)
440#define TREE_DESCR_32_1 (1<<5)
441#define TREE_DESCR_16_3 (1<<6)
442#define TREE_DESCR_8_0 (1<<7)
443#define TREE_DESCR_8_1 (1<<8)
444#define TREE_DESCR_8_2 (1<<9)
445#define TREE_DESCR_8_3 (1<<10)
446#define TREE_DESCR_8_4 (1<<11)
447#define TREE_DESCR_8_5 (1<<12)
448#define TREE_DESCR_8_6 (1<<13)
449#define TREE_DESCR_8_7 (1<<14)
450#define TREE_DESCR_DTY (1<<15)
451
452typedef
453 struct {
454 SVal dict[4]; /* can represent up to 4 diff values in the line */
455 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
456 dict indexes */
457 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
458 LineF to use, and dict[2..] are also SVal_INVALID. */
459 }
460 LineZ; /* compressed rep for a cache line */
461
462typedef
463 struct {
464 Bool inUse;
465 SVal w64s[N_LINE_ARANGE];
466 }
467 LineF; /* full rep for a cache line */
468
469/* Shadow memory.
470 Primary map is a WordFM Addr SecMap*.
471 SecMaps cover some page-size-ish section of address space and hold
472 a compressed representation.
473 CacheLine-sized chunks of SecMaps are copied into a Cache, being
474 decompressed when moved into the cache and recompressed on the
475 way out. Because of this, the cache must operate as a writeback
476 cache, not a writethrough one.
477
478 Each SecMap must hold a power-of-2 number of CacheLines. Hence
479 N_SECMAP_BITS must >= N_LINE_BITS.
480*/
481#define N_SECMAP_BITS 13
482#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
483
484// # CacheLines held by a SecMap
485#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
486
487/* The data in the SecMap is held in the array of LineZs. Each LineZ
488 either carries the required data directly, in a compressed
489 representation, or it holds (in .dict[0]) an index to the LineF in
490 .linesF that holds the full representation.
491
492 Currently-unused LineF's have their .inUse bit set to zero.
493 Since each in-use LineF is referred to be exactly one LineZ,
494 the number of .linesZ[] that refer to .linesF should equal
495 the number of .linesF[] that have .inUse == True.
496
497 RC obligations: the RCs presented to the user include exactly
498 the values in:
499 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
500 * F reps that are in use (.inUse == True)
501
502 Hence the following actions at the following transitions are required:
503
504 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
505 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
506 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
507 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
508*/
509typedef
510 struct {
511 UInt magic;
512 LineZ linesZ[N_SECMAP_ZLINES];
513 LineF* linesF;
514 UInt linesF_size;
515 }
516 SecMap;
517
518#define SecMap_MAGIC 0x571e58cbU
519
sewardj5aa09bf2014-06-20 14:25:53 +0000520__attribute__((unused))
sewardjf98e1c02008-10-25 16:22:41 +0000521static inline Bool is_sane_SecMap ( SecMap* sm ) {
522 return sm != NULL && sm->magic == SecMap_MAGIC;
523}
524
525/* ------ Cache ------ */
526
527#define N_WAY_BITS 16
528#define N_WAY_NENT (1 << N_WAY_BITS)
529
530/* Each tag is the address of the associated CacheLine, rounded down
531 to a CacheLine address boundary. A CacheLine size must be a power
532 of 2 and must be 8 or more. Hence an easy way to initialise the
533 cache so it is empty is to set all the tag values to any value % 8
534 != 0, eg 1. This means all queries in the cache initially miss.
535 It does however require us to detect and not writeback, any line
536 with a bogus tag. */
537typedef
538 struct {
539 CacheLine lyns0[N_WAY_NENT];
540 Addr tags0[N_WAY_NENT];
541 }
542 Cache;
543
544static inline Bool is_valid_scache_tag ( Addr tag ) {
545 /* a valid tag should be naturally aligned to the start of
546 a CacheLine. */
547 return 0 == (tag & (N_LINE_ARANGE - 1));
548}
549
550
551/* --------- Primary data structures --------- */
552
553/* Shadow memory primary map */
554static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
555static Cache cache_shmem;
556
557
558static UWord stats__secmaps_search = 0; // # SM finds
559static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
560static UWord stats__secmaps_allocd = 0; // # SecMaps issued
561static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
562static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
563static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
564static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
565static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
566static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
567static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
568static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
569static UWord stats__cache_F_fetches = 0; // # F lines fetched
570static UWord stats__cache_F_wbacks = 0; // # F lines written back
571static UWord stats__cache_invals = 0; // # cache invals
572static UWord stats__cache_flushes = 0; // # cache flushes
573static UWord stats__cache_totrefs = 0; // # total accesses
574static UWord stats__cache_totmisses = 0; // # misses
575static ULong stats__cache_make_New_arange = 0; // total arange made New
576static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
577static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000578static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
579static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
580static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
581static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
582static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
583static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
584static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
585static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
586static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
587static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
588static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
589static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
590static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
591static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000592static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
593static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
594static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
595static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
596static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
597static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000598static UWord stats__vts__tick = 0; // # calls to VTS__tick
599static UWord stats__vts__join = 0; // # calls to VTS__join
600static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
601static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
sewardj7aa38a92011-02-27 23:04:12 +0000602
603// # calls to VTS__cmp_structural w/ slow case
604static UWord stats__vts__cmp_structural_slow = 0;
605
606// # calls to VTS__indexAt_SLOW
607static UWord stats__vts__indexat_slow = 0;
608
609// # calls to vts_set__find__or__clone_and_add
610static UWord stats__vts_set__focaa = 0;
611
612// # calls to vts_set__find__or__clone_and_add that lead to an
613// allocation
614static UWord stats__vts_set__focaa_a = 0;
sewardjc8028ad2010-05-05 09:34:42 +0000615
sewardjf98e1c02008-10-25 16:22:41 +0000616
617static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
618 return a & ~(N_SECMAP_ARANGE - 1);
619}
620static inline UWord shmem__get_SecMap_offset ( Addr a ) {
621 return a & (N_SECMAP_ARANGE - 1);
622}
623
624
625/*----------------------------------------------------------------*/
626/*--- map_shmem :: WordFM Addr SecMap ---*/
627/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
628/*----------------------------------------------------------------*/
629
630/*--------------- SecMap allocation --------------- */
631
632static HChar* shmem__bigchunk_next = NULL;
633static HChar* shmem__bigchunk_end1 = NULL;
634
635static void* shmem__bigchunk_alloc ( SizeT n )
636{
637 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
638 tl_assert(n > 0);
639 n = VG_ROUNDUP(n, 16);
640 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
641 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
642 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
643 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
644 if (0)
645 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
646 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
647 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
648 if (shmem__bigchunk_next == NULL)
649 VG_(out_of_memory_NORETURN)(
650 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
651 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
652 }
653 tl_assert(shmem__bigchunk_next);
654 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
655 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
656 shmem__bigchunk_next += n;
657 return shmem__bigchunk_next - n;
658}
659
660static SecMap* shmem__alloc_SecMap ( void )
661{
662 Word i, j;
663 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
664 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
665 tl_assert(sm);
666 sm->magic = SecMap_MAGIC;
667 for (i = 0; i < N_SECMAP_ZLINES; i++) {
668 sm->linesZ[i].dict[0] = SVal_NOACCESS;
669 sm->linesZ[i].dict[1] = SVal_INVALID;
670 sm->linesZ[i].dict[2] = SVal_INVALID;
671 sm->linesZ[i].dict[3] = SVal_INVALID;
672 for (j = 0; j < N_LINE_ARANGE/4; j++)
673 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
674 }
675 sm->linesF = NULL;
676 sm->linesF_size = 0;
677 stats__secmaps_allocd++;
678 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
679 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
680 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
681 return sm;
682}
683
684typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
685static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
686
687static SecMap* shmem__find_SecMap ( Addr ga )
688{
689 SecMap* sm = NULL;
690 Addr gaKey = shmem__round_to_SecMap_base(ga);
691 // Cache
692 stats__secmaps_search++;
693 if (LIKELY(gaKey == smCache[0].gaKey))
694 return smCache[0].sm;
695 if (LIKELY(gaKey == smCache[1].gaKey)) {
696 SMCacheEnt tmp = smCache[0];
697 smCache[0] = smCache[1];
698 smCache[1] = tmp;
699 return smCache[0].sm;
700 }
701 if (gaKey == smCache[2].gaKey) {
702 SMCacheEnt tmp = smCache[1];
703 smCache[1] = smCache[2];
704 smCache[2] = tmp;
705 return smCache[1].sm;
706 }
707 // end Cache
708 stats__secmaps_search_slow++;
709 if (VG_(lookupFM)( map_shmem,
710 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
711 tl_assert(sm != NULL);
712 smCache[2] = smCache[1];
713 smCache[1] = smCache[0];
714 smCache[0].gaKey = gaKey;
715 smCache[0].sm = sm;
716 } else {
717 tl_assert(sm == NULL);
718 }
719 return sm;
720}
721
722static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
723{
724 SecMap* sm = shmem__find_SecMap ( ga );
725 if (LIKELY(sm)) {
726 return sm;
727 } else {
728 /* create a new one */
729 Addr gaKey = shmem__round_to_SecMap_base(ga);
730 sm = shmem__alloc_SecMap();
731 tl_assert(sm);
732 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
733 return sm;
734 }
735}
736
737
738/* ------------ LineF and LineZ related ------------ */
739
740static void rcinc_LineF ( LineF* lineF ) {
741 UWord i;
742 tl_assert(lineF->inUse);
743 for (i = 0; i < N_LINE_ARANGE; i++)
744 rcinc(lineF->w64s[i]);
745}
746
747static void rcdec_LineF ( LineF* lineF ) {
748 UWord i;
749 tl_assert(lineF->inUse);
750 for (i = 0; i < N_LINE_ARANGE; i++)
751 rcdec(lineF->w64s[i]);
752}
753
754static void rcinc_LineZ ( LineZ* lineZ ) {
755 tl_assert(lineZ->dict[0] != SVal_INVALID);
756 rcinc(lineZ->dict[0]);
757 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
758 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
759 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
760}
761
762static void rcdec_LineZ ( LineZ* lineZ ) {
763 tl_assert(lineZ->dict[0] != SVal_INVALID);
764 rcdec(lineZ->dict[0]);
765 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
766 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
767 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
768}
769
770inline
771static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
772 Word bix, shft, mask, prep;
773 tl_assert(ix >= 0);
774 bix = ix >> 2;
775 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
776 mask = 3 << shft;
777 prep = b2 << shft;
778 arr[bix] = (arr[bix] & ~mask) | prep;
779}
780
781inline
782static UWord read_twobit_array ( UChar* arr, UWord ix ) {
783 Word bix, shft;
784 tl_assert(ix >= 0);
785 bix = ix >> 2;
786 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
787 return (arr[bix] >> shft) & 3;
788}
789
790/* Given address 'tag', find either the Z or F line containing relevant
791 data, so it can be read into the cache.
792*/
793static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
794 /*OUT*/LineF** fp, Addr tag ) {
795 LineZ* lineZ;
796 LineF* lineF;
797 UWord zix;
798 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
799 UWord smoff = shmem__get_SecMap_offset(tag);
800 /* since smoff is derived from a valid tag, it should be
801 cacheline-aligned. */
802 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
803 zix = smoff >> N_LINE_BITS;
804 tl_assert(zix < N_SECMAP_ZLINES);
805 lineZ = &sm->linesZ[zix];
806 lineF = NULL;
807 if (lineZ->dict[0] == SVal_INVALID) {
808 UInt fix = (UInt)lineZ->dict[1];
809 tl_assert(sm->linesF);
810 tl_assert(sm->linesF_size > 0);
811 tl_assert(fix >= 0 && fix < sm->linesF_size);
812 lineF = &sm->linesF[fix];
813 tl_assert(lineF->inUse);
814 lineZ = NULL;
815 }
816 *zp = lineZ;
817 *fp = lineF;
818}
819
820/* Given address 'tag', return the relevant SecMap and the index of
821 the LineZ within it, in the expectation that the line is to be
822 overwritten. Regardless of whether 'tag' is currently associated
823 with a Z or F representation, to rcdec on the current
824 representation, in recognition of the fact that the contents are
825 just about to be overwritten. */
826static __attribute__((noinline))
827void find_Z_for_writing ( /*OUT*/SecMap** smp,
828 /*OUT*/Word* zixp,
829 Addr tag ) {
830 LineZ* lineZ;
831 LineF* lineF;
832 UWord zix;
833 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
834 UWord smoff = shmem__get_SecMap_offset(tag);
835 /* since smoff is derived from a valid tag, it should be
836 cacheline-aligned. */
837 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
838 zix = smoff >> N_LINE_BITS;
839 tl_assert(zix < N_SECMAP_ZLINES);
840 lineZ = &sm->linesZ[zix];
841 lineF = NULL;
842 /* re RCs, we are freeing up this LineZ/LineF so that new data can
843 be parked in it. Hence have to rcdec it accordingly. */
844 /* If lineZ has an associated lineF, free it up. */
845 if (lineZ->dict[0] == SVal_INVALID) {
846 UInt fix = (UInt)lineZ->dict[1];
847 tl_assert(sm->linesF);
848 tl_assert(sm->linesF_size > 0);
849 tl_assert(fix >= 0 && fix < sm->linesF_size);
850 lineF = &sm->linesF[fix];
851 tl_assert(lineF->inUse);
852 rcdec_LineF(lineF);
853 lineF->inUse = False;
854 } else {
855 rcdec_LineZ(lineZ);
856 }
857 *smp = sm;
858 *zixp = zix;
859}
860
861static __attribute__((noinline))
862void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
863 UInt i, new_size;
864 LineF* nyu;
865
866 if (sm->linesF) {
867 tl_assert(sm->linesF_size > 0);
868 } else {
869 tl_assert(sm->linesF_size == 0);
870 }
871
872 if (sm->linesF) {
873 for (i = 0; i < sm->linesF_size; i++) {
874 if (!sm->linesF[i].inUse) {
875 *fixp = (Word)i;
876 return;
877 }
878 }
879 }
880
881 /* No free F line found. Expand existing array and try again. */
882 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
883 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
884 new_size * sizeof(LineF) );
sewardjf98e1c02008-10-25 16:22:41 +0000885
886 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
887 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
888 * sizeof(LineF);
889
890 if (0)
891 VG_(printf)("SM %p: expand F array from %d to %d\n",
892 sm, (Int)sm->linesF_size, new_size);
893
894 for (i = 0; i < new_size; i++)
895 nyu[i].inUse = False;
896
897 if (sm->linesF) {
898 for (i = 0; i < sm->linesF_size; i++) {
899 tl_assert(sm->linesF[i].inUse);
900 nyu[i] = sm->linesF[i];
901 }
902 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
903 HG_(free)(sm->linesF);
904 }
905
906 sm->linesF = nyu;
907 sm->linesF_size = new_size;
908
909 for (i = 0; i < sm->linesF_size; i++) {
910 if (!sm->linesF[i].inUse) {
911 *fixp = (Word)i;
912 return;
913 }
philippe47124e92015-04-25 14:00:24 +0000914 }
sewardjf98e1c02008-10-25 16:22:41 +0000915
philippe47124e92015-04-25 14:00:24 +0000916 /*NOTREACHED*/
917 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +0000918}
919
920
921/* ------------ CacheLine and implicit-tree related ------------ */
922
923__attribute__((unused))
924static void pp_CacheLine ( CacheLine* cl ) {
925 Word i;
926 if (!cl) {
927 VG_(printf)("%s","pp_CacheLine(NULL)\n");
928 return;
929 }
930 for (i = 0; i < N_LINE_TREES; i++)
931 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
932 for (i = 0; i < N_LINE_ARANGE; i++)
933 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
934}
935
936static UChar descr_to_validbits ( UShort descr )
937{
938 /* a.k.a Party Time for gcc's constant folder */
939# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
940 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
941 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
942 ( (b8_5) << 12) | ( (b8_4) << 11) | \
943 ( (b8_3) << 10) | ( (b8_2) << 9) | \
944 ( (b8_1) << 8) | ( (b8_0) << 7) | \
945 ( (b16_3) << 6) | ( (b32_1) << 5) | \
946 ( (b16_2) << 4) | ( (b64) << 3) | \
947 ( (b16_1) << 2) | ( (b32_0) << 1) | \
948 ( (b16_0) << 0) ) )
949
950# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
951 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
952 ( (bit5) << 5) | ( (bit4) << 4) | \
953 ( (bit3) << 3) | ( (bit2) << 2) | \
954 ( (bit1) << 1) | ( (bit0) << 0) ) )
955
956 /* these should all get folded out at compile time */
957 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
958 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
959 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
960 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
961 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
962 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
963 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
964 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
965 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
966
967 switch (descr) {
968 /*
969 +--------------------------------- TREE_DESCR_8_7
970 | +------------------- TREE_DESCR_8_0
971 | | +---------------- TREE_DESCR_16_3
972 | | | +-------------- TREE_DESCR_32_1
973 | | | | +------------ TREE_DESCR_16_2
974 | | | | | +--------- TREE_DESCR_64
975 | | | | | | +------ TREE_DESCR_16_1
976 | | | | | | | +---- TREE_DESCR_32_0
977 | | | | | | | | +-- TREE_DESCR_16_0
978 | | | | | | | | |
979 | | | | | | | | | GRANULARITY, 7 -> 0 */
980 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
981 return BYTE(1,1,1,1,1,1,1,1);
982 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
983 return BYTE(1,1,0,1,1,1,1,1);
984 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
985 return BYTE(0,1,1,1,1,1,1,1);
986 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
987 return BYTE(0,1,0,1,1,1,1,1);
988
989 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
990 return BYTE(1,1,1,1,1,1,0,1);
991 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
992 return BYTE(1,1,0,1,1,1,0,1);
993 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
994 return BYTE(0,1,1,1,1,1,0,1);
995 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
996 return BYTE(0,1,0,1,1,1,0,1);
997
998 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
999 return BYTE(1,1,1,1,0,1,1,1);
1000 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1001 return BYTE(1,1,0,1,0,1,1,1);
1002 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1003 return BYTE(0,1,1,1,0,1,1,1);
1004 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1005 return BYTE(0,1,0,1,0,1,1,1);
1006
1007 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1008 return BYTE(1,1,1,1,0,1,0,1);
1009 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1010 return BYTE(1,1,0,1,0,1,0,1);
1011 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1012 return BYTE(0,1,1,1,0,1,0,1);
1013 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1014 return BYTE(0,1,0,1,0,1,0,1);
1015
1016 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1017 return BYTE(0,0,0,1,1,1,1,1);
1018 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1019 return BYTE(0,0,0,1,1,1,0,1);
1020 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1021 return BYTE(0,0,0,1,0,1,1,1);
1022 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1023 return BYTE(0,0,0,1,0,1,0,1);
1024
1025 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1026 return BYTE(1,1,1,1,0,0,0,1);
1027 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1028 return BYTE(1,1,0,1,0,0,0,1);
1029 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1030 return BYTE(0,1,1,1,0,0,0,1);
1031 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1032 return BYTE(0,1,0,1,0,0,0,1);
1033
1034 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1035 return BYTE(0,0,0,1,0,0,0,1);
1036
1037 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1038 return BYTE(0,0,0,0,0,0,0,1);
1039
1040 default: return BYTE(0,0,0,0,0,0,0,0);
1041 /* INVALID - any valid descr produces at least one
1042 valid bit in tree[0..7]*/
1043 }
1044 /* NOTREACHED*/
1045 tl_assert(0);
1046
1047# undef DESCR
1048# undef BYTE
1049}
1050
1051__attribute__((unused))
1052static Bool is_sane_Descr ( UShort descr ) {
1053 return descr_to_validbits(descr) != 0;
1054}
1055
1056static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1057 VG_(sprintf)(dst,
1058 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1059 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1060 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1061 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1062 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1063 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1064 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1065 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1066 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1067 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1068 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1069 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1070 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1071 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1072 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1073 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1074 );
1075}
1076static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1077 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1078 (Int)((byte & 128) ? 1 : 0),
1079 (Int)((byte & 64) ? 1 : 0),
1080 (Int)((byte & 32) ? 1 : 0),
1081 (Int)((byte & 16) ? 1 : 0),
1082 (Int)((byte & 8) ? 1 : 0),
1083 (Int)((byte & 4) ? 1 : 0),
1084 (Int)((byte & 2) ? 1 : 0),
1085 (Int)((byte & 1) ? 1 : 0)
1086 );
1087}
1088
1089static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1090 Word i;
1091 UChar validbits = descr_to_validbits(descr);
florian7b7d5942014-12-19 20:29:22 +00001092 HChar buf[128], buf2[128]; // large enough
sewardjf98e1c02008-10-25 16:22:41 +00001093 if (validbits == 0)
1094 goto bad;
1095 for (i = 0; i < 8; i++) {
1096 if (validbits & (1<<i)) {
1097 if (tree[i] == SVal_INVALID)
1098 goto bad;
1099 } else {
1100 if (tree[i] != SVal_INVALID)
1101 goto bad;
1102 }
1103 }
1104 return True;
1105 bad:
1106 sprintf_Descr( buf, descr );
1107 sprintf_Byte( buf2, validbits );
1108 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1109 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1110 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1111 for (i = 0; i < 8; i++)
1112 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1113 VG_(printf)("%s","}\n");
1114 return 0;
1115}
1116
1117static Bool is_sane_CacheLine ( CacheLine* cl )
1118{
1119 Word tno, cloff;
1120
1121 if (!cl) goto bad;
1122
1123 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1124 UShort descr = cl->descrs[tno];
1125 SVal* tree = &cl->svals[cloff];
1126 if (!is_sane_Descr_and_Tree(descr, tree))
1127 goto bad;
1128 }
1129 tl_assert(cloff == N_LINE_ARANGE);
1130 return True;
1131 bad:
1132 pp_CacheLine(cl);
1133 return False;
1134}
1135
1136static UShort normalise_tree ( /*MOD*/SVal* tree )
1137{
1138 UShort descr;
1139 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1140 particular no zeroes. */
1141 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1142 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1143 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1144 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
1145 tl_assert(0);
1146
1147 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1148 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1149 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1150 /* build 16-bit layer */
1151 if (tree[1] == tree[0]) {
1152 tree[1] = SVal_INVALID;
1153 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1154 descr |= TREE_DESCR_16_0;
1155 }
1156 if (tree[3] == tree[2]) {
1157 tree[3] = SVal_INVALID;
1158 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1159 descr |= TREE_DESCR_16_1;
1160 }
1161 if (tree[5] == tree[4]) {
1162 tree[5] = SVal_INVALID;
1163 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1164 descr |= TREE_DESCR_16_2;
1165 }
1166 if (tree[7] == tree[6]) {
1167 tree[7] = SVal_INVALID;
1168 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1169 descr |= TREE_DESCR_16_3;
1170 }
1171 /* build 32-bit layer */
1172 if (tree[2] == tree[0]
1173 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1174 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1175 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1176 descr |= TREE_DESCR_32_0;
1177 }
1178 if (tree[6] == tree[4]
1179 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1180 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1181 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1182 descr |= TREE_DESCR_32_1;
1183 }
1184 /* build 64-bit layer */
1185 if (tree[4] == tree[0]
1186 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1187 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1188 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1189 descr |= TREE_DESCR_64;
1190 }
1191 return descr;
1192}
1193
1194/* This takes a cacheline where all the data is at the leaves
1195 (w8[..]) and builds a correctly normalised tree. */
1196static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1197{
1198 Word tno, cloff;
1199 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1200 SVal* tree = &cl->svals[cloff];
1201 cl->descrs[tno] = normalise_tree( tree );
1202 }
1203 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +00001204 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001205 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1206 stats__cline_normalises++;
1207}
1208
1209
1210typedef struct { UChar count; SVal sval; } CountedSVal;
1211
1212static
1213void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1214 /*OUT*/Word* dstUsedP,
1215 Word nDst, CacheLine* src )
1216{
1217 Word tno, cloff, dstUsed;
1218
1219 tl_assert(nDst == N_LINE_ARANGE);
1220 dstUsed = 0;
1221
1222 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1223 UShort descr = src->descrs[tno];
1224 SVal* tree = &src->svals[cloff];
1225
1226 /* sequentialise the tree described by (descr,tree). */
1227# define PUT(_n,_v) \
1228 do { dst[dstUsed ].count = (_n); \
1229 dst[dstUsed++].sval = (_v); \
1230 } while (0)
1231
1232 /* byte 0 */
1233 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1234 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1235 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1236 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1237 /* byte 1 */
1238 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1239 /* byte 2 */
1240 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1241 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1242 /* byte 3 */
1243 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1244 /* byte 4 */
1245 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1246 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1247 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1248 /* byte 5 */
1249 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1250 /* byte 6 */
1251 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1252 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1253 /* byte 7 */
1254 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1255
1256# undef PUT
1257 /* END sequentialise the tree described by (descr,tree). */
1258
1259 }
1260 tl_assert(cloff == N_LINE_ARANGE);
1261 tl_assert(dstUsed <= nDst);
1262
1263 *dstUsedP = dstUsed;
1264}
1265
1266/* Write the cacheline 'wix' to backing store. Where it ends up
1267 is determined by its tag field. */
1268static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1269{
1270 Word i, j, k, m;
1271 Addr tag;
1272 SecMap* sm;
1273 CacheLine* cl;
1274 LineZ* lineZ;
1275 LineF* lineF;
1276 Word zix, fix, csvalsUsed;
1277 CountedSVal csvals[N_LINE_ARANGE];
1278 SVal sv;
1279
1280 if (0)
1281 VG_(printf)("scache wback line %d\n", (Int)wix);
1282
1283 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1284
1285 tag = cache_shmem.tags0[wix];
1286 cl = &cache_shmem.lyns0[wix];
1287
1288 /* The cache line may have been invalidated; if so, ignore it. */
1289 if (!is_valid_scache_tag(tag))
1290 return;
1291
1292 /* Where are we going to put it? */
1293 sm = NULL;
1294 lineZ = NULL;
1295 lineF = NULL;
1296 zix = fix = -1;
1297
1298 /* find the Z line to write in and rcdec it or the associated F
1299 line. */
1300 find_Z_for_writing( &sm, &zix, tag );
1301
1302 tl_assert(sm);
1303 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1304 lineZ = &sm->linesZ[zix];
1305
1306 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001307 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001308 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1309
1310 csvalsUsed = -1;
1311 sequentialise_CacheLine( csvals, &csvalsUsed,
1312 N_LINE_ARANGE, cl );
1313 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1314 if (0) VG_(printf)("%lu ", csvalsUsed);
1315
1316 lineZ->dict[0] = lineZ->dict[1]
1317 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1318
1319 /* i indexes actual shadow values, k is cursor in csvals */
1320 i = 0;
1321 for (k = 0; k < csvalsUsed; k++) {
1322
1323 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001324 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001325 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1326 /* do we already have it? */
1327 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1328 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1329 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1330 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1331 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001332 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001333 tl_assert(sv != SVal_INVALID);
1334 if (lineZ->dict[0]
1335 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1336 if (lineZ->dict[1]
1337 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1338 if (lineZ->dict[2]
1339 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1340 if (lineZ->dict[3]
1341 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1342 break; /* we'll have to use the f rep */
1343 dict_ok:
1344 m = csvals[k].count;
1345 if (m == 8) {
1346 write_twobit_array( lineZ->ix2s, i+0, j );
1347 write_twobit_array( lineZ->ix2s, i+1, j );
1348 write_twobit_array( lineZ->ix2s, i+2, j );
1349 write_twobit_array( lineZ->ix2s, i+3, j );
1350 write_twobit_array( lineZ->ix2s, i+4, j );
1351 write_twobit_array( lineZ->ix2s, i+5, j );
1352 write_twobit_array( lineZ->ix2s, i+6, j );
1353 write_twobit_array( lineZ->ix2s, i+7, j );
1354 i += 8;
1355 }
1356 else if (m == 4) {
1357 write_twobit_array( lineZ->ix2s, i+0, j );
1358 write_twobit_array( lineZ->ix2s, i+1, j );
1359 write_twobit_array( lineZ->ix2s, i+2, j );
1360 write_twobit_array( lineZ->ix2s, i+3, j );
1361 i += 4;
1362 }
1363 else if (m == 1) {
1364 write_twobit_array( lineZ->ix2s, i+0, j );
1365 i += 1;
1366 }
1367 else if (m == 2) {
1368 write_twobit_array( lineZ->ix2s, i+0, j );
1369 write_twobit_array( lineZ->ix2s, i+1, j );
1370 i += 2;
1371 }
1372 else {
1373 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1374 }
1375
1376 }
1377
1378 if (LIKELY(i == N_LINE_ARANGE)) {
1379 /* Construction of the compressed representation was
1380 successful. */
1381 rcinc_LineZ(lineZ);
1382 stats__cache_Z_wbacks++;
1383 } else {
1384 /* Cannot use the compressed(z) representation. Use the full(f)
1385 rep instead. */
1386 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1387 alloc_F_for_writing( sm, &fix );
1388 tl_assert(sm->linesF);
1389 tl_assert(sm->linesF_size > 0);
1390 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1391 lineF = &sm->linesF[fix];
1392 tl_assert(!lineF->inUse);
1393 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1394 lineZ->dict[1] = (SVal)fix;
1395 lineF->inUse = True;
1396 i = 0;
1397 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001398 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001399 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1400 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001401 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001402 tl_assert(sv != SVal_INVALID);
1403 for (m = csvals[k].count; m > 0; m--) {
1404 lineF->w64s[i] = sv;
1405 i++;
1406 }
1407 }
1408 tl_assert(i == N_LINE_ARANGE);
1409 rcinc_LineF(lineF);
1410 stats__cache_F_wbacks++;
1411 }
sewardjf98e1c02008-10-25 16:22:41 +00001412}
1413
1414/* Fetch the cacheline 'wix' from the backing store. The tag
1415 associated with 'wix' is assumed to have already been filled in;
1416 hence that is used to determine where in the backing store to read
1417 from. */
1418static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1419{
1420 Word i;
1421 Addr tag;
1422 CacheLine* cl;
1423 LineZ* lineZ;
1424 LineF* lineF;
1425
1426 if (0)
1427 VG_(printf)("scache fetch line %d\n", (Int)wix);
1428
1429 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1430
1431 tag = cache_shmem.tags0[wix];
1432 cl = &cache_shmem.lyns0[wix];
1433
1434 /* reject nonsense requests */
1435 tl_assert(is_valid_scache_tag(tag));
1436
1437 lineZ = NULL;
1438 lineF = NULL;
1439 find_ZF_for_reading( &lineZ, &lineF, tag );
1440 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1441
1442 /* expand the data into the bottom layer of the tree, then get
1443 cacheline_normalise to build the descriptor array. */
1444 if (lineF) {
1445 tl_assert(lineF->inUse);
1446 for (i = 0; i < N_LINE_ARANGE; i++) {
1447 cl->svals[i] = lineF->w64s[i];
1448 }
1449 stats__cache_F_fetches++;
1450 } else {
1451 for (i = 0; i < N_LINE_ARANGE; i++) {
1452 SVal sv;
1453 UWord ix = read_twobit_array( lineZ->ix2s, i );
1454 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1455 sv = lineZ->dict[ix];
1456 tl_assert(sv != SVal_INVALID);
1457 cl->svals[i] = sv;
1458 }
1459 stats__cache_Z_fetches++;
1460 }
1461 normalise_CacheLine( cl );
1462}
1463
1464static void shmem__invalidate_scache ( void ) {
1465 Word wix;
1466 if (0) VG_(printf)("%s","scache inval\n");
1467 tl_assert(!is_valid_scache_tag(1));
1468 for (wix = 0; wix < N_WAY_NENT; wix++) {
1469 cache_shmem.tags0[wix] = 1/*INVALID*/;
1470 }
1471 stats__cache_invals++;
1472}
1473
1474static void shmem__flush_and_invalidate_scache ( void ) {
1475 Word wix;
1476 Addr tag;
1477 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1478 tl_assert(!is_valid_scache_tag(1));
1479 for (wix = 0; wix < N_WAY_NENT; wix++) {
1480 tag = cache_shmem.tags0[wix];
1481 if (tag == 1/*INVALID*/) {
1482 /* already invalid; nothing to do */
1483 } else {
1484 tl_assert(is_valid_scache_tag(tag));
1485 cacheline_wback( wix );
1486 }
1487 cache_shmem.tags0[wix] = 1/*INVALID*/;
1488 }
1489 stats__cache_flushes++;
1490 stats__cache_invals++;
1491}
1492
1493
1494static inline Bool aligned16 ( Addr a ) {
1495 return 0 == (a & 1);
1496}
1497static inline Bool aligned32 ( Addr a ) {
1498 return 0 == (a & 3);
1499}
1500static inline Bool aligned64 ( Addr a ) {
1501 return 0 == (a & 7);
1502}
1503static inline UWord get_cacheline_offset ( Addr a ) {
1504 return (UWord)(a & (N_LINE_ARANGE - 1));
1505}
1506static inline Addr cacheline_ROUNDUP ( Addr a ) {
1507 return ROUNDUP(a, N_LINE_ARANGE);
1508}
1509static inline Addr cacheline_ROUNDDN ( Addr a ) {
1510 return ROUNDDN(a, N_LINE_ARANGE);
1511}
1512static inline UWord get_treeno ( Addr a ) {
1513 return get_cacheline_offset(a) >> 3;
1514}
1515static inline UWord get_tree_offset ( Addr a ) {
1516 return a & 7;
1517}
1518
1519static __attribute__((noinline))
1520 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1521static inline CacheLine* get_cacheline ( Addr a )
1522{
1523 /* tag is 'a' with the in-line offset masked out,
1524 eg a[31]..a[4] 0000 */
1525 Addr tag = a & ~(N_LINE_ARANGE - 1);
1526 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1527 stats__cache_totrefs++;
1528 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1529 return &cache_shmem.lyns0[wix];
1530 } else {
1531 return get_cacheline_MISS( a );
1532 }
1533}
1534
1535static __attribute__((noinline))
1536 CacheLine* get_cacheline_MISS ( Addr a )
1537{
1538 /* tag is 'a' with the in-line offset masked out,
1539 eg a[31]..a[4] 0000 */
1540
1541 CacheLine* cl;
1542 Addr* tag_old_p;
1543 Addr tag = a & ~(N_LINE_ARANGE - 1);
1544 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1545
1546 tl_assert(tag != cache_shmem.tags0[wix]);
1547
1548 /* Dump the old line into the backing store. */
1549 stats__cache_totmisses++;
1550
1551 cl = &cache_shmem.lyns0[wix];
1552 tag_old_p = &cache_shmem.tags0[wix];
1553
1554 if (is_valid_scache_tag( *tag_old_p )) {
1555 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001556 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001557 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1558 cacheline_wback( wix );
1559 }
1560 /* and reload the new one */
1561 *tag_old_p = tag;
1562 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001563 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001564 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1565 return cl;
1566}
1567
1568static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1569 stats__cline_64to32pulldown++;
1570 switch (toff) {
1571 case 0: case 4:
1572 tl_assert(descr & TREE_DESCR_64);
1573 tree[4] = tree[0];
1574 descr &= ~TREE_DESCR_64;
1575 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1576 break;
1577 default:
1578 tl_assert(0);
1579 }
1580 return descr;
1581}
1582
1583static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1584 stats__cline_32to16pulldown++;
1585 switch (toff) {
1586 case 0: case 2:
1587 if (!(descr & TREE_DESCR_32_0)) {
1588 descr = pulldown_to_32(tree, 0, descr);
1589 }
1590 tl_assert(descr & TREE_DESCR_32_0);
1591 tree[2] = tree[0];
1592 descr &= ~TREE_DESCR_32_0;
1593 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1594 break;
1595 case 4: case 6:
1596 if (!(descr & TREE_DESCR_32_1)) {
1597 descr = pulldown_to_32(tree, 4, descr);
1598 }
1599 tl_assert(descr & TREE_DESCR_32_1);
1600 tree[6] = tree[4];
1601 descr &= ~TREE_DESCR_32_1;
1602 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1603 break;
1604 default:
1605 tl_assert(0);
1606 }
1607 return descr;
1608}
1609
1610static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1611 stats__cline_16to8pulldown++;
1612 switch (toff) {
1613 case 0: case 1:
1614 if (!(descr & TREE_DESCR_16_0)) {
1615 descr = pulldown_to_16(tree, 0, descr);
1616 }
1617 tl_assert(descr & TREE_DESCR_16_0);
1618 tree[1] = tree[0];
1619 descr &= ~TREE_DESCR_16_0;
1620 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1621 break;
1622 case 2: case 3:
1623 if (!(descr & TREE_DESCR_16_1)) {
1624 descr = pulldown_to_16(tree, 2, descr);
1625 }
1626 tl_assert(descr & TREE_DESCR_16_1);
1627 tree[3] = tree[2];
1628 descr &= ~TREE_DESCR_16_1;
1629 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1630 break;
1631 case 4: case 5:
1632 if (!(descr & TREE_DESCR_16_2)) {
1633 descr = pulldown_to_16(tree, 4, descr);
1634 }
1635 tl_assert(descr & TREE_DESCR_16_2);
1636 tree[5] = tree[4];
1637 descr &= ~TREE_DESCR_16_2;
1638 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1639 break;
1640 case 6: case 7:
1641 if (!(descr & TREE_DESCR_16_3)) {
1642 descr = pulldown_to_16(tree, 6, descr);
1643 }
1644 tl_assert(descr & TREE_DESCR_16_3);
1645 tree[7] = tree[6];
1646 descr &= ~TREE_DESCR_16_3;
1647 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1648 break;
1649 default:
1650 tl_assert(0);
1651 }
1652 return descr;
1653}
1654
1655
1656static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1657 UShort mask;
1658 switch (toff) {
1659 case 0:
1660 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1661 tl_assert( (descr & mask) == mask );
1662 descr &= ~mask;
1663 descr |= TREE_DESCR_16_0;
1664 break;
1665 case 2:
1666 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1667 tl_assert( (descr & mask) == mask );
1668 descr &= ~mask;
1669 descr |= TREE_DESCR_16_1;
1670 break;
1671 case 4:
1672 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1673 tl_assert( (descr & mask) == mask );
1674 descr &= ~mask;
1675 descr |= TREE_DESCR_16_2;
1676 break;
1677 case 6:
1678 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1679 tl_assert( (descr & mask) == mask );
1680 descr &= ~mask;
1681 descr |= TREE_DESCR_16_3;
1682 break;
1683 default:
1684 tl_assert(0);
1685 }
1686 return descr;
1687}
1688
1689static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1690 UShort mask;
1691 switch (toff) {
1692 case 0:
1693 if (!(descr & TREE_DESCR_16_0))
1694 descr = pullup_descr_to_16(descr, 0);
1695 if (!(descr & TREE_DESCR_16_1))
1696 descr = pullup_descr_to_16(descr, 2);
1697 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1698 tl_assert( (descr & mask) == mask );
1699 descr &= ~mask;
1700 descr |= TREE_DESCR_32_0;
1701 break;
1702 case 4:
1703 if (!(descr & TREE_DESCR_16_2))
1704 descr = pullup_descr_to_16(descr, 4);
1705 if (!(descr & TREE_DESCR_16_3))
1706 descr = pullup_descr_to_16(descr, 6);
1707 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1708 tl_assert( (descr & mask) == mask );
1709 descr &= ~mask;
1710 descr |= TREE_DESCR_32_1;
1711 break;
1712 default:
1713 tl_assert(0);
1714 }
1715 return descr;
1716}
1717
1718static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1719 switch (toff) {
1720 case 0: case 4:
1721 return 0 != (descr & TREE_DESCR_64);
1722 default:
1723 tl_assert(0);
1724 }
1725}
1726
1727static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1728 switch (toff) {
1729 case 0:
1730 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1731 case 2:
1732 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1733 case 4:
1734 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1735 case 6:
1736 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1737 default:
1738 tl_assert(0);
1739 }
1740}
1741
1742/* ------------ Cache management ------------ */
1743
1744static void zsm_flush_cache ( void )
1745{
1746 shmem__flush_and_invalidate_scache();
1747}
1748
1749
1750static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1751{
1752 tl_assert( sizeof(UWord) == sizeof(Addr) );
1753
1754 rcinc = p_rcinc;
1755 rcdec = p_rcdec;
1756
1757 tl_assert(map_shmem == NULL);
1758 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1759 HG_(free),
1760 NULL/*unboxed UWord cmp*/);
sewardjf98e1c02008-10-25 16:22:41 +00001761 shmem__invalidate_scache();
1762
1763 /* a SecMap must contain an integral number of CacheLines */
1764 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1765 /* also ... a CacheLine holds an integral number of trees */
1766 tl_assert(0 == (N_LINE_ARANGE % 8));
1767}
1768
1769/////////////////////////////////////////////////////////////////
1770/////////////////////////////////////////////////////////////////
1771// //
1772// SECTION END compressed shadow memory //
1773// //
1774/////////////////////////////////////////////////////////////////
1775/////////////////////////////////////////////////////////////////
1776
1777
1778
1779/////////////////////////////////////////////////////////////////
1780/////////////////////////////////////////////////////////////////
1781// //
1782// SECTION BEGIN vts primitives //
1783// //
1784/////////////////////////////////////////////////////////////////
1785/////////////////////////////////////////////////////////////////
1786
sewardjf98e1c02008-10-25 16:22:41 +00001787
sewardje4cce742011-02-24 15:25:24 +00001788/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1789 being compact stand-ins for Thr*'s. Use these functions to map
1790 between them. */
1791static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
1792static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
1793
sewardje4cce742011-02-24 15:25:24 +00001794__attribute__((noreturn))
1795static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
1796{
1797 if (due_to_nThrs) {
florian6bf37262012-10-21 03:23:36 +00001798 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001799 "\n"
1800 "Helgrind: cannot continue, run aborted: too many threads.\n"
1801 "Sorry. Helgrind can only handle programs that create\n"
1802 "%'llu or fewer threads over their entire lifetime.\n"
1803 "\n";
sewardj03e7d272011-05-04 09:08:34 +00001804 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
sewardje4cce742011-02-24 15:25:24 +00001805 } else {
florian6bf37262012-10-21 03:23:36 +00001806 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001807 "\n"
1808 "Helgrind: cannot continue, run aborted: too many\n"
1809 "synchronisation events. Sorry. Helgrind can only handle\n"
1810 "programs which perform %'llu or fewer\n"
1811 "inter-thread synchronisation events (locks, unlocks, etc).\n"
1812 "\n";
1813 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
1814 }
1815 VG_(exit)(1);
1816 /*NOTREACHED*/
1817 tl_assert(0); /*wtf?!*/
1818}
1819
1820
philippec3508652015-03-28 12:01:58 +00001821/* The dead thread (ThrID, actually) tables. A thread may only be
sewardjffce8152011-06-24 10:09:41 +00001822 listed here if we have been notified thereof by libhb_async_exit.
1823 New entries are added at the end. The order isn't important, but
philippec3508652015-03-28 12:01:58 +00001824 the ThrID values must be unique.
1825 verydead_thread_table_not_pruned lists the identity of the threads
1826 that died since the previous round of pruning.
1827 Once pruning is done, these ThrID are added in verydead_thread_table.
1828 We don't actually need to keep the set of threads that have ever died --
sewardjffce8152011-06-24 10:09:41 +00001829 only the threads that have died since the previous round of
1830 pruning. But it's useful for sanity check purposes to keep the
1831 entire set, so we do. */
philippec3508652015-03-28 12:01:58 +00001832static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
sewardjffce8152011-06-24 10:09:41 +00001833static XArray* /* of ThrID */ verydead_thread_table = NULL;
1834
1835/* Arbitrary total ordering on ThrIDs. */
florian6bd9dc12012-11-23 16:17:43 +00001836static Int cmp__ThrID ( const void* v1, const void* v2 ) {
1837 ThrID id1 = *(const ThrID*)v1;
1838 ThrID id2 = *(const ThrID*)v2;
sewardjffce8152011-06-24 10:09:41 +00001839 if (id1 < id2) return -1;
1840 if (id1 > id2) return 1;
1841 return 0;
1842}
1843
philippec3508652015-03-28 12:01:58 +00001844static void verydead_thread_tables_init ( void )
sewardjffce8152011-06-24 10:09:41 +00001845{
1846 tl_assert(!verydead_thread_table);
philippec3508652015-03-28 12:01:58 +00001847 tl_assert(!verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00001848 verydead_thread_table
1849 = VG_(newXA)( HG_(zalloc),
1850 "libhb.verydead_thread_table_init.1",
1851 HG_(free), sizeof(ThrID) );
sewardjffce8152011-06-24 10:09:41 +00001852 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
philippec3508652015-03-28 12:01:58 +00001853 verydead_thread_table_not_pruned
1854 = VG_(newXA)( HG_(zalloc),
1855 "libhb.verydead_thread_table_init.2",
1856 HG_(free), sizeof(ThrID) );
1857 VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
sewardjffce8152011-06-24 10:09:41 +00001858}
1859
philippec3508652015-03-28 12:01:58 +00001860static void verydead_thread_table_sort_and_check (XArray* thrids)
1861{
1862 UWord i;
1863
1864 VG_(sortXA)( thrids );
1865 /* Sanity check: check for unique .sts.thr values. */
1866 UWord nBT = VG_(sizeXA)( thrids );
1867 if (nBT > 0) {
1868 ThrID thrid1, thrid2;
1869 thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
1870 for (i = 1; i < nBT; i++) {
1871 thrid1 = thrid2;
1872 thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
1873 tl_assert(thrid1 < thrid2);
1874 }
1875 }
1876 /* Ok, so the dead thread table thrids has unique and in-order keys. */
1877}
sewardjf98e1c02008-10-25 16:22:41 +00001878
1879/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1880 a backlink for the caller's convenience. Since we have no idea
1881 what to set that to in the library, it always gets set to
1882 VtsID_INVALID. */
1883typedef
1884 struct {
sewardj7aa38a92011-02-27 23:04:12 +00001885 VtsID id;
1886 UInt usedTS;
1887 UInt sizeTS;
1888 ScalarTS ts[0];
sewardjf98e1c02008-10-25 16:22:41 +00001889 }
1890 VTS;
1891
sewardj7aa38a92011-02-27 23:04:12 +00001892/* Allocate a VTS capable of storing 'sizeTS' entries. */
florian6bd9dc12012-11-23 16:17:43 +00001893static VTS* VTS__new ( const HChar* who, UInt sizeTS );
sewardjf98e1c02008-10-25 16:22:41 +00001894
sewardjffce8152011-06-24 10:09:41 +00001895/* Make a clone of 'vts', sizing the new array to exactly match the
sewardj7aa38a92011-02-27 23:04:12 +00001896 number of ScalarTSs present. */
florian6bd9dc12012-11-23 16:17:43 +00001897static VTS* VTS__clone ( const HChar* who, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001898
sewardjffce8152011-06-24 10:09:41 +00001899/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
1900 array is sized exactly to hold the number of required elements.
1901 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
1902 must be in strictly increasing order. */
florian6bd9dc12012-11-23 16:17:43 +00001903static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
sewardjffce8152011-06-24 10:09:41 +00001904
sewardjf98e1c02008-10-25 16:22:41 +00001905/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00001906static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001907
sewardj7aa38a92011-02-27 23:04:12 +00001908/* Create a new singleton VTS in 'out'. Caller must have
1909 pre-allocated 'out' sufficiently big to hold the result in all
1910 possible cases. */
1911static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00001912
sewardj7aa38a92011-02-27 23:04:12 +00001913/* Create in 'out' a VTS which is the same as 'vts' except with
1914 vts[me]++, so to speak. Caller must have pre-allocated 'out'
1915 sufficiently big to hold the result in all possible cases. */
1916static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001917
sewardj7aa38a92011-02-27 23:04:12 +00001918/* Create in 'out' a VTS which is the join (max) of 'a' and
1919 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
1920 the result in all possible cases. */
1921static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001922
sewardj23f12002009-07-24 08:45:08 +00001923/* Compute the partial ordering relation of the two args. Although we
1924 could be completely general and return an enumeration value (EQ,
1925 LT, GT, UN), in fact we only need LEQ, and so we may as well
1926 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00001927
sewardje4cce742011-02-24 15:25:24 +00001928 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
1929 invald ThrID). In the latter case, the returned ThrID indicates
1930 the discovered point for which they are not. There may be more
1931 than one such point, but we only care about seeing one of them, not
1932 all of them. This rather strange convention is used because
1933 sometimes we want to know the actual index at which they first
1934 differ. */
1935static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001936
1937/* Compute an arbitrary structural (total) ordering on the two args,
1938 based on their VCs, so they can be looked up in a table, tree, etc.
1939 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00001940static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001941
florianb28fe892014-10-28 20:52:07 +00001942/* Debugging only. Display the given VTS. */
1943static void VTS__show ( const VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001944
1945/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00001946static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001947
sewardjffce8152011-06-24 10:09:41 +00001948/* Notify the VTS machinery that a thread has been declared
1949 comprehensively dead: that is, it has done an async exit AND it has
1950 been joined with. This should ensure that its local clocks (.viR
1951 and .viW) will never again change, and so all mentions of this
1952 thread from all VTSs in the system may be removed. */
1953static void VTS__declare_thread_very_dead ( Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001954
1955/*--------------- to do with Vector Timestamps ---------------*/
1956
sewardjf98e1c02008-10-25 16:22:41 +00001957static Bool is_sane_VTS ( VTS* vts )
1958{
1959 UWord i, n;
1960 ScalarTS *st1, *st2;
1961 if (!vts) return False;
sewardj555fc572011-02-27 23:39:53 +00001962 if (vts->usedTS > vts->sizeTS) return False;
sewardj7aa38a92011-02-27 23:04:12 +00001963 n = vts->usedTS;
1964 if (n == 1) {
1965 st1 = &vts->ts[0];
1966 if (st1->tym == 0)
1967 return False;
1968 }
1969 else
sewardjf98e1c02008-10-25 16:22:41 +00001970 if (n >= 2) {
1971 for (i = 0; i < n-1; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00001972 st1 = &vts->ts[i];
1973 st2 = &vts->ts[i+1];
sewardje4cce742011-02-24 15:25:24 +00001974 if (st1->thrid >= st2->thrid)
sewardjf98e1c02008-10-25 16:22:41 +00001975 return False;
1976 if (st1->tym == 0 || st2->tym == 0)
1977 return False;
1978 }
1979 }
1980 return True;
1981}
1982
1983
sewardj7aa38a92011-02-27 23:04:12 +00001984/* Create a new, empty VTS.
sewardjf98e1c02008-10-25 16:22:41 +00001985*/
florian6bd9dc12012-11-23 16:17:43 +00001986static VTS* VTS__new ( const HChar* who, UInt sizeTS )
sewardjf98e1c02008-10-25 16:22:41 +00001987{
sewardj7aa38a92011-02-27 23:04:12 +00001988 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
1989 tl_assert(vts->usedTS == 0);
1990 vts->sizeTS = sizeTS;
1991 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
sewardjf98e1c02008-10-25 16:22:41 +00001992 return vts;
1993}
1994
sewardj7aa38a92011-02-27 23:04:12 +00001995/* Clone this VTS.
1996*/
florian6bd9dc12012-11-23 16:17:43 +00001997static VTS* VTS__clone ( const HChar* who, VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00001998{
1999 tl_assert(vts);
2000 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2001 UInt nTS = vts->usedTS;
2002 VTS* clone = VTS__new(who, nTS);
2003 clone->id = vts->id;
2004 clone->sizeTS = nTS;
2005 clone->usedTS = nTS;
2006 UInt i;
2007 for (i = 0; i < nTS; i++) {
2008 clone->ts[i] = vts->ts[i];
2009 }
2010 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2011 return clone;
2012}
2013
sewardjf98e1c02008-10-25 16:22:41 +00002014
sewardjffce8152011-06-24 10:09:41 +00002015/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
2016 must be in strictly increasing order. We could obviously do this
2017 much more efficiently (in linear time) if necessary.
2018*/
florian6bd9dc12012-11-23 16:17:43 +00002019static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
sewardjffce8152011-06-24 10:09:41 +00002020{
2021 UInt i, j;
2022 tl_assert(vts);
2023 tl_assert(thridsToDel);
2024 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2025 UInt nTS = vts->usedTS;
2026 /* Figure out how many ScalarTSs will remain in the output. */
2027 UInt nReq = nTS;
2028 for (i = 0; i < nTS; i++) {
2029 ThrID thrid = vts->ts[i].thrid;
2030 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2031 nReq--;
2032 }
2033 tl_assert(nReq <= nTS);
2034 /* Copy the ones that will remain. */
2035 VTS* res = VTS__new(who, nReq);
2036 j = 0;
2037 for (i = 0; i < nTS; i++) {
2038 ThrID thrid = vts->ts[i].thrid;
2039 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2040 continue;
2041 res->ts[j++] = vts->ts[i];
2042 }
2043 tl_assert(j == nReq);
2044 tl_assert(j == res->sizeTS);
2045 res->usedTS = j;
2046 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2047 return res;
2048}
2049
2050
sewardjf98e1c02008-10-25 16:22:41 +00002051/* Delete this VTS in its entirety.
2052*/
sewardj7aa38a92011-02-27 23:04:12 +00002053static void VTS__delete ( VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002054{
2055 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002056 tl_assert(vts->usedTS <= vts->sizeTS);
2057 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
sewardjf98e1c02008-10-25 16:22:41 +00002058 HG_(free)(vts);
2059}
2060
2061
2062/* Create a new singleton VTS.
2063*/
sewardj7aa38a92011-02-27 23:04:12 +00002064static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2065{
sewardjf98e1c02008-10-25 16:22:41 +00002066 tl_assert(thr);
2067 tl_assert(tym >= 1);
sewardj7aa38a92011-02-27 23:04:12 +00002068 tl_assert(out);
2069 tl_assert(out->usedTS == 0);
2070 tl_assert(out->sizeTS >= 1);
2071 UInt hi = out->usedTS++;
2072 out->ts[hi].thrid = Thr__to_ThrID(thr);
2073 out->ts[hi].tym = tym;
sewardjf98e1c02008-10-25 16:22:41 +00002074}
2075
2076
2077/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2078 not modified.
2079*/
sewardj7aa38a92011-02-27 23:04:12 +00002080static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002081{
sewardj7aa38a92011-02-27 23:04:12 +00002082 UInt i, n;
sewardje4cce742011-02-24 15:25:24 +00002083 ThrID me_thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002084 Bool found = False;
sewardjc8028ad2010-05-05 09:34:42 +00002085
2086 stats__vts__tick++;
2087
sewardj7aa38a92011-02-27 23:04:12 +00002088 tl_assert(out);
2089 tl_assert(out->usedTS == 0);
2090 if (vts->usedTS >= ThrID_MAX_VALID)
2091 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2092 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2093
sewardjf98e1c02008-10-25 16:22:41 +00002094 tl_assert(me);
sewardje4cce742011-02-24 15:25:24 +00002095 me_thrid = Thr__to_ThrID(me);
sewardjf98e1c02008-10-25 16:22:41 +00002096 tl_assert(is_sane_VTS(vts));
sewardj7aa38a92011-02-27 23:04:12 +00002097 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002098
sewardj555fc572011-02-27 23:39:53 +00002099 /* Copy all entries which precede 'me'. */
2100 for (i = 0; i < n; i++) {
2101 ScalarTS* here = &vts->ts[i];
2102 if (UNLIKELY(here->thrid >= me_thrid))
2103 break;
2104 UInt hi = out->usedTS++;
2105 out->ts[hi] = *here;
2106 }
2107
2108 /* 'i' now indicates the next entry to copy, if any.
2109 There are 3 possibilities:
2110 (a) there is no next entry (we used them all up already):
2111 add (me_thrid,1) to the output, and quit
2112 (b) there is a next entry, and its thrid > me_thrid:
2113 add (me_thrid,1) to the output, then copy the remaining entries
2114 (c) there is a next entry, and its thrid == me_thrid:
2115 copy it to the output but increment its timestamp value.
2116 Then copy the remaining entries. (c) is the common case.
2117 */
2118 tl_assert(i >= 0 && i <= n);
2119 if (i == n) { /* case (a) */
sewardj7aa38a92011-02-27 23:04:12 +00002120 UInt hi = out->usedTS++;
2121 out->ts[hi].thrid = me_thrid;
2122 out->ts[hi].tym = 1;
sewardj555fc572011-02-27 23:39:53 +00002123 } else {
2124 /* cases (b) and (c) */
2125 ScalarTS* here = &vts->ts[i];
2126 if (me_thrid == here->thrid) { /* case (c) */
sewardj7aa38a92011-02-27 23:04:12 +00002127 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
sewardje4cce742011-02-24 15:25:24 +00002128 /* We're hosed. We have to stop. */
2129 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2130 }
sewardj7aa38a92011-02-27 23:04:12 +00002131 UInt hi = out->usedTS++;
2132 out->ts[hi].thrid = here->thrid;
2133 out->ts[hi].tym = here->tym + 1;
sewardjf98e1c02008-10-25 16:22:41 +00002134 i++;
sewardj555fc572011-02-27 23:39:53 +00002135 found = True;
2136 } else { /* case (b) */
sewardj7aa38a92011-02-27 23:04:12 +00002137 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002138 out->ts[hi].thrid = me_thrid;
2139 out->ts[hi].tym = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002140 }
sewardj555fc572011-02-27 23:39:53 +00002141 /* And copy any remaining entries. */
sewardjf98e1c02008-10-25 16:22:41 +00002142 for (/*keepgoing*/; i < n; i++) {
sewardj555fc572011-02-27 23:39:53 +00002143 ScalarTS* here2 = &vts->ts[i];
sewardj7aa38a92011-02-27 23:04:12 +00002144 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002145 out->ts[hi] = *here2;
sewardjf98e1c02008-10-25 16:22:41 +00002146 }
2147 }
sewardj555fc572011-02-27 23:39:53 +00002148
sewardj7aa38a92011-02-27 23:04:12 +00002149 tl_assert(is_sane_VTS(out));
2150 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2151 tl_assert(out->usedTS <= out->sizeTS);
sewardjf98e1c02008-10-25 16:22:41 +00002152}
2153
2154
2155/* Return a new VTS constructed as the join (max) of the 2 args.
2156 Neither arg is modified.
2157*/
sewardj7aa38a92011-02-27 23:04:12 +00002158static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002159{
sewardj7aa38a92011-02-27 23:04:12 +00002160 UInt ia, ib, useda, usedb;
sewardjf98e1c02008-10-25 16:22:41 +00002161 ULong tyma, tymb, tymMax;
sewardje4cce742011-02-24 15:25:24 +00002162 ThrID thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002163 UInt ncommon = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002164
sewardjc8028ad2010-05-05 09:34:42 +00002165 stats__vts__join++;
2166
sewardj7aa38a92011-02-27 23:04:12 +00002167 tl_assert(a);
2168 tl_assert(b);
2169 useda = a->usedTS;
2170 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002171
sewardj7aa38a92011-02-27 23:04:12 +00002172 tl_assert(out);
2173 tl_assert(out->usedTS == 0);
2174 /* overly conservative test, but doing better involves comparing
2175 the two VTSs, which we don't want to do at this point. */
2176 if (useda + usedb >= ThrID_MAX_VALID)
2177 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2178 tl_assert(out->sizeTS >= useda + usedb);
2179
sewardjf98e1c02008-10-25 16:22:41 +00002180 ia = ib = 0;
2181
2182 while (1) {
2183
sewardje4cce742011-02-24 15:25:24 +00002184 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2185 from a and b in order, where thrid is the next ThrID
sewardjf98e1c02008-10-25 16:22:41 +00002186 occurring in either a or b, and tyma/b are the relevant
2187 scalar timestamps, taking into account implicit zeroes. */
2188 tl_assert(ia >= 0 && ia <= useda);
2189 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002190
njn4c245e52009-03-15 23:25:38 +00002191 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002192 /* both empty - done */
2193 break;
njn4c245e52009-03-15 23:25:38 +00002194
2195 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002196 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002197 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002198 thrid = tmpb->thrid;
2199 tyma = 0;
2200 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002201 ib++;
njn4c245e52009-03-15 23:25:38 +00002202
2203 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002204 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002205 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002206 thrid = tmpa->thrid;
2207 tyma = tmpa->tym;
2208 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002209 ia++;
njn4c245e52009-03-15 23:25:38 +00002210
2211 } else {
sewardje4cce742011-02-24 15:25:24 +00002212 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002213 ScalarTS* tmpa = &a->ts[ia];
2214 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002215 if (tmpa->thrid < tmpb->thrid) {
2216 /* a has the lowest unconsidered ThrID */
2217 thrid = tmpa->thrid;
2218 tyma = tmpa->tym;
2219 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002220 ia++;
sewardje4cce742011-02-24 15:25:24 +00002221 } else if (tmpa->thrid > tmpb->thrid) {
2222 /* b has the lowest unconsidered ThrID */
2223 thrid = tmpb->thrid;
2224 tyma = 0;
2225 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002226 ib++;
2227 } else {
sewardje4cce742011-02-24 15:25:24 +00002228 /* they both next mention the same ThrID */
2229 tl_assert(tmpa->thrid == tmpb->thrid);
2230 thrid = tmpa->thrid; /* == tmpb->thrid */
2231 tyma = tmpa->tym;
2232 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002233 ia++;
2234 ib++;
sewardj7aa38a92011-02-27 23:04:12 +00002235 ncommon++;
sewardjf98e1c02008-10-25 16:22:41 +00002236 }
2237 }
2238
2239 /* having laboriously determined (thr, tyma, tymb), do something
2240 useful with it. */
2241 tymMax = tyma > tymb ? tyma : tymb;
2242 if (tymMax > 0) {
sewardj7aa38a92011-02-27 23:04:12 +00002243 UInt hi = out->usedTS++;
2244 out->ts[hi].thrid = thrid;
2245 out->ts[hi].tym = tymMax;
sewardjf98e1c02008-10-25 16:22:41 +00002246 }
2247
2248 }
2249
sewardj7aa38a92011-02-27 23:04:12 +00002250 tl_assert(is_sane_VTS(out));
2251 tl_assert(out->usedTS <= out->sizeTS);
2252 tl_assert(out->usedTS == useda + usedb - ncommon);
sewardjf98e1c02008-10-25 16:22:41 +00002253}
2254
2255
sewardje4cce742011-02-24 15:25:24 +00002256/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2257 they are, or the first ThrID for which they are not (no valid ThrID
2258 has the value zero). This rather strange convention is used
2259 because sometimes we want to know the actual index at which they
2260 first differ. */
2261static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002262{
sewardj23f12002009-07-24 08:45:08 +00002263 Word ia, ib, useda, usedb;
2264 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00002265
sewardjc8028ad2010-05-05 09:34:42 +00002266 stats__vts__cmpLEQ++;
2267
sewardj7aa38a92011-02-27 23:04:12 +00002268 tl_assert(a);
2269 tl_assert(b);
2270 useda = a->usedTS;
2271 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002272
2273 ia = ib = 0;
2274
2275 while (1) {
2276
njn4c245e52009-03-15 23:25:38 +00002277 /* This logic is to enumerate doubles (tyma, tymb) drawn
2278 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00002279 scalar timestamps, taking into account implicit zeroes. */
sewardje4cce742011-02-24 15:25:24 +00002280 ThrID thrid;
sewardj23f12002009-07-24 08:45:08 +00002281
sewardjf98e1c02008-10-25 16:22:41 +00002282 tl_assert(ia >= 0 && ia <= useda);
2283 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002284
njn4c245e52009-03-15 23:25:38 +00002285 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002286 /* both empty - done */
2287 break;
njn4c245e52009-03-15 23:25:38 +00002288
2289 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002290 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002291 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002292 tyma = 0;
2293 tymb = tmpb->tym;
2294 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002295 ib++;
njn4c245e52009-03-15 23:25:38 +00002296
2297 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002298 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002299 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002300 tyma = tmpa->tym;
2301 thrid = tmpa->thrid;
2302 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002303 ia++;
njn4c245e52009-03-15 23:25:38 +00002304
2305 } else {
sewardje4cce742011-02-24 15:25:24 +00002306 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002307 ScalarTS* tmpa = &a->ts[ia];
2308 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002309 if (tmpa->thrid < tmpb->thrid) {
2310 /* a has the lowest unconsidered ThrID */
2311 tyma = tmpa->tym;
2312 thrid = tmpa->thrid;
2313 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002314 ia++;
2315 }
2316 else
sewardje4cce742011-02-24 15:25:24 +00002317 if (tmpa->thrid > tmpb->thrid) {
2318 /* b has the lowest unconsidered ThrID */
2319 tyma = 0;
2320 tymb = tmpb->tym;
2321 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002322 ib++;
2323 } else {
sewardje4cce742011-02-24 15:25:24 +00002324 /* they both next mention the same ThrID */
2325 tl_assert(tmpa->thrid == tmpb->thrid);
2326 tyma = tmpa->tym;
2327 thrid = tmpa->thrid;
2328 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002329 ia++;
2330 ib++;
2331 }
2332 }
2333
njn4c245e52009-03-15 23:25:38 +00002334 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00002335 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00002336 if (tyma > tymb) {
2337 /* not LEQ at this index. Quit, since the answer is
2338 determined already. */
sewardje4cce742011-02-24 15:25:24 +00002339 tl_assert(thrid >= 1024);
2340 return thrid;
sewardj23f12002009-07-24 08:45:08 +00002341 }
sewardjf98e1c02008-10-25 16:22:41 +00002342 }
2343
sewardje4cce742011-02-24 15:25:24 +00002344 return 0; /* all points are LEQ => return an invalid ThrID */
sewardjf98e1c02008-10-25 16:22:41 +00002345}
2346
2347
2348/* Compute an arbitrary structural (total) ordering on the two args,
2349 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00002350 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2351 performance critical so there is some effort expended to make it sa
2352 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00002353*/
2354Word VTS__cmp_structural ( VTS* a, VTS* b )
2355{
2356 /* We just need to generate an arbitrary total ordering based on
2357 a->ts and b->ts. Preferably do it in a way which comes across likely
2358 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00002359 Word i;
2360 Word useda = 0, usedb = 0;
2361 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002362
sewardjc8028ad2010-05-05 09:34:42 +00002363 stats__vts__cmp_structural++;
2364
2365 tl_assert(a);
2366 tl_assert(b);
2367
sewardj7aa38a92011-02-27 23:04:12 +00002368 ctsa = &a->ts[0]; useda = a->usedTS;
2369 ctsb = &b->ts[0]; usedb = b->usedTS;
sewardjc8028ad2010-05-05 09:34:42 +00002370
2371 if (LIKELY(useda == usedb)) {
2372 ScalarTS *tmpa = NULL, *tmpb = NULL;
2373 stats__vts__cmp_structural_slow++;
2374 /* Same length vectors. Find the first difference, if any, as
2375 fast as possible. */
2376 for (i = 0; i < useda; i++) {
2377 tmpa = &ctsa[i];
2378 tmpb = &ctsb[i];
sewardje4cce742011-02-24 15:25:24 +00002379 if (LIKELY(tmpa->tym == tmpb->tym
2380 && tmpa->thrid == tmpb->thrid))
sewardjc8028ad2010-05-05 09:34:42 +00002381 continue;
2382 else
2383 break;
2384 }
2385 if (UNLIKELY(i == useda)) {
2386 /* They're identical. */
2387 return 0;
2388 } else {
2389 tl_assert(i >= 0 && i < useda);
2390 if (tmpa->tym < tmpb->tym) return -1;
2391 if (tmpa->tym > tmpb->tym) return 1;
sewardje4cce742011-02-24 15:25:24 +00002392 if (tmpa->thrid < tmpb->thrid) return -1;
2393 if (tmpa->thrid > tmpb->thrid) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002394 /* we just established them as non-identical, hence: */
2395 }
2396 /*NOTREACHED*/
2397 tl_assert(0);
2398 }
sewardjf98e1c02008-10-25 16:22:41 +00002399
2400 if (useda < usedb) return -1;
2401 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002402 /*NOTREACHED*/
2403 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00002404}
2405
2406
florianb28fe892014-10-28 20:52:07 +00002407/* Debugging only. Display the given VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002408*/
florianb28fe892014-10-28 20:52:07 +00002409static void VTS__show ( const VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002410{
sewardjf98e1c02008-10-25 16:22:41 +00002411 Word i, n;
florian4367abe2015-02-28 09:22:09 +00002412 tl_assert(vts);
florianb28fe892014-10-28 20:52:07 +00002413
2414 VG_(printf)("[");
sewardj7aa38a92011-02-27 23:04:12 +00002415 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002416 for (i = 0; i < n; i++) {
florianb28fe892014-10-28 20:52:07 +00002417 const ScalarTS *st = &vts->ts[i];
2418 VG_(printf)(i < n-1 ? "%u:%llu " : "%u:%llu", st->thrid, (ULong)st->tym);
sewardjf98e1c02008-10-25 16:22:41 +00002419 }
florianb28fe892014-10-28 20:52:07 +00002420 VG_(printf)("]");
sewardjf98e1c02008-10-25 16:22:41 +00002421}
2422
2423
2424/* Debugging only. Return vts[index], so to speak.
2425*/
sewardj7aa38a92011-02-27 23:04:12 +00002426ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2427{
sewardjf98e1c02008-10-25 16:22:41 +00002428 UWord i, n;
sewardje4cce742011-02-24 15:25:24 +00002429 ThrID idx_thrid = Thr__to_ThrID(idx);
sewardjc8028ad2010-05-05 09:34:42 +00002430 stats__vts__indexat_slow++;
florian4367abe2015-02-28 09:22:09 +00002431 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002432 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002433 for (i = 0; i < n; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002434 ScalarTS* st = &vts->ts[i];
sewardje4cce742011-02-24 15:25:24 +00002435 if (st->thrid == idx_thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002436 return st->tym;
2437 }
2438 return 0;
2439}
2440
2441
sewardjffce8152011-06-24 10:09:41 +00002442/* See comment on prototype above.
2443*/
2444static void VTS__declare_thread_very_dead ( Thr* thr )
2445{
2446 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2447
2448 tl_assert(thr->llexit_done);
2449 tl_assert(thr->joinedwith_done);
2450
2451 ThrID nyu;
2452 nyu = Thr__to_ThrID(thr);
philippec3508652015-03-28 12:01:58 +00002453 VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
sewardjffce8152011-06-24 10:09:41 +00002454
2455 /* We can only get here if we're assured that we'll never again
2456 need to look at this thread's ::viR or ::viW. Set them to
2457 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2458 mostly so that we don't wind up pruning them (as that would be
2459 nonsensical: the only interesting ScalarTS entry for a dead
2460 thread is its own index, and the pruning will remove that.). */
2461 VtsID__rcdec(thr->viR);
2462 VtsID__rcdec(thr->viW);
2463 thr->viR = VtsID_INVALID;
2464 thr->viW = VtsID_INVALID;
2465}
2466
2467
sewardjf98e1c02008-10-25 16:22:41 +00002468/////////////////////////////////////////////////////////////////
2469/////////////////////////////////////////////////////////////////
2470// //
2471// SECTION END vts primitives //
2472// //
2473/////////////////////////////////////////////////////////////////
2474/////////////////////////////////////////////////////////////////
2475
2476
2477
2478/////////////////////////////////////////////////////////////////
2479/////////////////////////////////////////////////////////////////
2480// //
2481// SECTION BEGIN main library //
2482// //
2483/////////////////////////////////////////////////////////////////
2484/////////////////////////////////////////////////////////////////
2485
2486
2487/////////////////////////////////////////////////////////
2488// //
2489// VTS set //
2490// //
2491/////////////////////////////////////////////////////////
2492
sewardjffce8152011-06-24 10:09:41 +00002493static WordFM* /* WordFM VTS* void */ vts_set = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002494
2495static void vts_set_init ( void )
2496{
2497 tl_assert(!vts_set);
2498 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2499 HG_(free),
2500 (Word(*)(UWord,UWord))VTS__cmp_structural );
sewardjf98e1c02008-10-25 16:22:41 +00002501}
2502
sewardj7aa38a92011-02-27 23:04:12 +00002503/* Given a VTS, look in vts_set to see if we already have a
2504 structurally identical one. If yes, return the pair (True, pointer
2505 to the existing one). If no, clone this one, add the clone to the
2506 set, and return (False, pointer to the clone). */
2507static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002508{
2509 UWord keyW, valW;
sewardj7aa38a92011-02-27 23:04:12 +00002510 stats__vts_set__focaa++;
2511 tl_assert(cand->id == VtsID_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00002512 /* lookup cand (by value) */
2513 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2514 /* found it */
2515 tl_assert(valW == 0);
2516 /* if this fails, cand (by ref) was already present (!) */
2517 tl_assert(keyW != (UWord)cand);
sewardj7aa38a92011-02-27 23:04:12 +00002518 *res = (VTS*)keyW;
2519 return True;
sewardjf98e1c02008-10-25 16:22:41 +00002520 } else {
sewardj7aa38a92011-02-27 23:04:12 +00002521 /* not present. Clone, add and return address of clone. */
2522 stats__vts_set__focaa_a++;
2523 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2524 tl_assert(clone != cand);
2525 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2526 *res = clone;
2527 return False;
sewardjf98e1c02008-10-25 16:22:41 +00002528 }
2529}
2530
2531
2532/////////////////////////////////////////////////////////
2533// //
2534// VTS table //
2535// //
2536/////////////////////////////////////////////////////////
2537
2538static void VtsID__invalidate_caches ( void ); /* fwds */
2539
2540/* A type to hold VTS table entries. Invariants:
2541 If .vts == NULL, then this entry is not in use, so:
2542 - .rc == 0
2543 - this entry is on the freelist (unfortunately, does not imply
philippea1ac2f42015-05-01 17:12:00 +00002544 any constraints on value for u.freelink)
sewardjf98e1c02008-10-25 16:22:41 +00002545 If .vts != NULL, then this entry is in use:
2546 - .vts is findable in vts_set
2547 - .vts->id == this entry number
2548 - no specific value for .rc (even 0 is OK)
philippea1ac2f42015-05-01 17:12:00 +00002549 - this entry is not on freelist, so u.freelink == VtsID_INVALID
sewardjf98e1c02008-10-25 16:22:41 +00002550*/
2551typedef
2552 struct {
2553 VTS* vts; /* vts, in vts_set */
2554 UWord rc; /* reference count - enough for entire aspace */
philippea1ac2f42015-05-01 17:12:00 +00002555 union {
2556 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2557 VtsID remap; /* used only during pruning, for used entries */
2558 } u;
2559 /* u.freelink only used when vts == NULL,
2560 u.remap only used when vts != NULL, during pruning. */
sewardjf98e1c02008-10-25 16:22:41 +00002561 }
2562 VtsTE;
2563
2564/* The VTS table. */
2565static XArray* /* of VtsTE */ vts_tab = NULL;
2566
2567/* An index into the VTS table, indicating the start of the list of
2568 free (available for use) entries. If the list is empty, this is
2569 VtsID_INVALID. */
2570static VtsID vts_tab_freelist = VtsID_INVALID;
2571
2572/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2573 vts_tab equals or exceeds this size. After GC, the value here is
2574 set appropriately so as to check for the next GC point. */
2575static Word vts_next_GC_at = 1000;
2576
2577static void vts_tab_init ( void )
2578{
florian91ed8cc2014-09-15 18:50:17 +00002579 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2580 HG_(free), sizeof(VtsTE) );
2581 vts_tab_freelist = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002582}
2583
2584/* Add ii to the free list, checking that it looks out-of-use. */
2585static void add_to_free_list ( VtsID ii )
2586{
2587 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2588 tl_assert(ie->vts == NULL);
2589 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002590 tl_assert(ie->u.freelink == VtsID_INVALID);
2591 ie->u.freelink = vts_tab_freelist;
sewardjf98e1c02008-10-25 16:22:41 +00002592 vts_tab_freelist = ii;
2593}
2594
2595/* Get an entry from the free list. This will return VtsID_INVALID if
2596 the free list is empty. */
2597static VtsID get_from_free_list ( void )
2598{
2599 VtsID ii;
2600 VtsTE* ie;
2601 if (vts_tab_freelist == VtsID_INVALID)
2602 return VtsID_INVALID;
2603 ii = vts_tab_freelist;
2604 ie = VG_(indexXA)( vts_tab, ii );
2605 tl_assert(ie->vts == NULL);
2606 tl_assert(ie->rc == 0);
philippea1ac2f42015-05-01 17:12:00 +00002607 vts_tab_freelist = ie->u.freelink;
sewardjf98e1c02008-10-25 16:22:41 +00002608 return ii;
2609}
2610
2611/* Produce a new VtsID that can be used, either by getting it from
2612 the freelist, or, if that is empty, by expanding vts_tab. */
2613static VtsID get_new_VtsID ( void )
2614{
2615 VtsID ii;
2616 VtsTE te;
2617 ii = get_from_free_list();
2618 if (ii != VtsID_INVALID)
2619 return ii;
2620 te.vts = NULL;
2621 te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002622 te.u.freelink = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002623 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2624 return ii;
2625}
2626
2627
2628/* Indirect callback from lib_zsm. */
2629static void VtsID__rcinc ( VtsID ii )
2630{
2631 VtsTE* ie;
2632 /* VG_(indexXA) does a range check for us */
2633 ie = VG_(indexXA)( vts_tab, ii );
2634 tl_assert(ie->vts); /* else it's not in use */
2635 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2636 tl_assert(ie->vts->id == ii);
2637 ie->rc++;
2638}
2639
2640/* Indirect callback from lib_zsm. */
2641static void VtsID__rcdec ( VtsID ii )
2642{
2643 VtsTE* ie;
2644 /* VG_(indexXA) does a range check for us */
2645 ie = VG_(indexXA)( vts_tab, ii );
2646 tl_assert(ie->vts); /* else it's not in use */
2647 tl_assert(ie->rc > 0); /* else RC snafu */
2648 tl_assert(ie->vts->id == ii);
2649 ie->rc--;
2650}
2651
2652
sewardj7aa38a92011-02-27 23:04:12 +00002653/* Look up 'cand' in our collection of VTSs. If present, return the
2654 VtsID for the pre-existing version. If not present, clone it, add
2655 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2656 it, and return that. */
2657static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002658{
sewardj7aa38a92011-02-27 23:04:12 +00002659 VTS* in_tab = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002660 tl_assert(cand->id == VtsID_INVALID);
sewardj7aa38a92011-02-27 23:04:12 +00002661 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2662 tl_assert(in_tab);
2663 if (already_have) {
2664 /* We already have a copy of 'cand'. Use that. */
sewardjf98e1c02008-10-25 16:22:41 +00002665 VtsTE* ie;
sewardj7aa38a92011-02-27 23:04:12 +00002666 tl_assert(in_tab->id != VtsID_INVALID);
2667 ie = VG_(indexXA)( vts_tab, in_tab->id );
2668 tl_assert(ie->vts == in_tab);
2669 return in_tab->id;
sewardjf98e1c02008-10-25 16:22:41 +00002670 } else {
2671 VtsID ii = get_new_VtsID();
2672 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
sewardj7aa38a92011-02-27 23:04:12 +00002673 ie->vts = in_tab;
sewardjf98e1c02008-10-25 16:22:41 +00002674 ie->rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002675 ie->u.freelink = VtsID_INVALID;
sewardj7aa38a92011-02-27 23:04:12 +00002676 in_tab->id = ii;
sewardjf98e1c02008-10-25 16:22:41 +00002677 return ii;
2678 }
2679}
2680
2681
florian6bd9dc12012-11-23 16:17:43 +00002682static void show_vts_stats ( const HChar* caller )
sewardjf98e1c02008-10-25 16:22:41 +00002683{
2684 UWord nSet, nTab, nLive;
2685 ULong totrc;
2686 UWord n, i;
2687 nSet = VG_(sizeFM)( vts_set );
2688 nTab = VG_(sizeXA)( vts_tab );
2689 totrc = 0;
2690 nLive = 0;
2691 n = VG_(sizeXA)( vts_tab );
2692 for (i = 0; i < n; i++) {
2693 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2694 if (ie->vts) {
2695 nLive++;
2696 totrc += (ULong)ie->rc;
2697 } else {
2698 tl_assert(ie->rc == 0);
2699 }
2700 }
2701 VG_(printf)(" show_vts_stats %s\n", caller);
2702 VG_(printf)(" vts_tab size %4lu\n", nTab);
2703 VG_(printf)(" vts_tab live %4lu\n", nLive);
2704 VG_(printf)(" vts_set size %4lu\n", nSet);
2705 VG_(printf)(" total rc %4llu\n", totrc);
2706}
2707
sewardjffce8152011-06-24 10:09:41 +00002708
2709/* --- Helpers for VtsID pruning --- */
2710
2711static
2712void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2713 /*MOD*/XArray* /* of VtsTE */ new_tab,
2714 VtsID* ii )
2715{
2716 VtsTE *old_te, *new_te;
2717 VtsID old_id, new_id;
2718 /* We're relying here on VG_(indexXA)'s range checking to assert on
2719 any stupid values, in particular *ii == VtsID_INVALID. */
2720 old_id = *ii;
2721 old_te = VG_(indexXA)( old_tab, old_id );
2722 old_te->rc--;
philippea1ac2f42015-05-01 17:12:00 +00002723 new_id = old_te->u.remap;
sewardjffce8152011-06-24 10:09:41 +00002724 new_te = VG_(indexXA)( new_tab, new_id );
2725 new_te->rc++;
2726 *ii = new_id;
2727}
2728
2729static
2730void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2731 /*MOD*/XArray* /* of VtsTE */ new_tab,
2732 SVal* s )
2733{
2734 SVal old_sv, new_sv;
2735 old_sv = *s;
2736 if (SVal__isC(old_sv)) {
2737 VtsID rMin, wMin;
2738 rMin = SVal__unC_Rmin(old_sv);
2739 wMin = SVal__unC_Wmin(old_sv);
2740 remap_VtsID( old_tab, new_tab, &rMin );
2741 remap_VtsID( old_tab, new_tab, &wMin );
2742 new_sv = SVal__mkC( rMin, wMin );
2743 *s = new_sv;
2744 }
2745}
2746
2747
sewardjf98e1c02008-10-25 16:22:41 +00002748/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002749__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002750static void vts_tab__do_GC ( Bool show_stats )
2751{
2752 UWord i, nTab, nLive, nFreed;
2753
sewardjffce8152011-06-24 10:09:41 +00002754 /* ---------- BEGIN VTS GC ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00002755 /* check this is actually necessary. */
2756 tl_assert(vts_tab_freelist == VtsID_INVALID);
2757
2758 /* empty the caches for partial order checks and binary joins. We
2759 could do better and prune out the entries to be deleted, but it
2760 ain't worth the hassle. */
2761 VtsID__invalidate_caches();
2762
2763 /* First, make the reference counts up to date. */
2764 zsm_flush_cache();
2765
2766 nTab = VG_(sizeXA)( vts_tab );
2767
2768 if (show_stats) {
2769 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2770 show_vts_stats("before GC");
2771 }
2772
sewardjffce8152011-06-24 10:09:41 +00002773 /* Now we can inspect the entire vts_tab. Any entries with zero
2774 .rc fields are now no longer in use and can be put back on the
sewardjf98e1c02008-10-25 16:22:41 +00002775 free list, removed from vts_set, and deleted. */
2776 nFreed = 0;
2777 for (i = 0; i < nTab; i++) {
2778 Bool present;
sewardjffce8152011-06-24 10:09:41 +00002779 UWord oldK = 0, oldV = 12345;
sewardjf98e1c02008-10-25 16:22:41 +00002780 VtsTE* te = VG_(indexXA)( vts_tab, i );
2781 if (te->vts == NULL) {
2782 tl_assert(te->rc == 0);
2783 continue; /* already on the free list (presumably) */
2784 }
2785 if (te->rc > 0)
2786 continue; /* in use */
2787 /* Ok, we got one we can free. */
2788 tl_assert(te->vts->id == i);
2789 /* first, remove it from vts_set. */
2790 present = VG_(delFromFM)( vts_set,
2791 &oldK, &oldV, (UWord)te->vts );
2792 tl_assert(present); /* else it isn't in vts_set ?! */
2793 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2794 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2795 /* now free the VTS itself */
2796 VTS__delete(te->vts);
2797 te->vts = NULL;
2798 /* and finally put this entry on the free list */
philippea1ac2f42015-05-01 17:12:00 +00002799 tl_assert(te->u.freelink == VtsID_INVALID); /* can't already be on it */
sewardjf98e1c02008-10-25 16:22:41 +00002800 add_to_free_list( i );
2801 nFreed++;
2802 }
2803
2804 /* Now figure out when the next GC should be. We'll allow the
2805 number of VTSs to double before GCing again. Except of course
2806 that since we can't (or, at least, don't) shrink vts_tab, we
2807 can't set the threshhold value smaller than it. */
2808 tl_assert(nFreed <= nTab);
2809 nLive = nTab - nFreed;
2810 tl_assert(nLive >= 0 && nLive <= nTab);
2811 vts_next_GC_at = 2 * nLive;
2812 if (vts_next_GC_at < nTab)
2813 vts_next_GC_at = nTab;
2814
2815 if (show_stats) {
2816 show_vts_stats("after GC");
2817 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2818 }
2819
sewardj5e2ac3b2009-08-11 10:39:25 +00002820 if (VG_(clo_stats)) {
sewardjffce8152011-06-24 10:09:41 +00002821 static UInt ctr = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002822 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002823 VG_(message)(Vg_DebugMsg,
sewardj24118492009-07-15 14:50:02 +00002824 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
sewardj8aa41de2009-01-22 12:24:26 +00002825 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00002826 }
sewardjffce8152011-06-24 10:09:41 +00002827 /* ---------- END VTS GC ---------- */
2828
2829 /* Decide whether to do VTS pruning. We have one of three
2830 settings. */
2831 static UInt pruning_auto_ctr = 0; /* do not make non-static */
2832
2833 Bool do_pruning = False;
2834 switch (HG_(clo_vts_pruning)) {
2835 case 0: /* never */
2836 break;
2837 case 1: /* auto */
2838 do_pruning = (++pruning_auto_ctr % 5) == 0;
2839 break;
2840 case 2: /* always */
2841 do_pruning = True;
2842 break;
2843 default:
2844 tl_assert(0);
2845 }
2846
2847 /* The rest of this routine only handles pruning, so we can
2848 quit at this point if it is not to be done. */
2849 if (!do_pruning)
2850 return;
philippec3508652015-03-28 12:01:58 +00002851 /* No need to do pruning if no thread died since the last pruning as
2852 no VtsTE can be pruned. */
2853 if (VG_(sizeXA)( verydead_thread_table_not_pruned) == 0)
2854 return;
sewardjffce8152011-06-24 10:09:41 +00002855
2856 /* ---------- BEGIN VTS PRUNING ---------- */
philippec3508652015-03-28 12:01:58 +00002857 /* Sort and check the very dead threads that died since the last pruning.
2858 Sorting is used for the check and so that we can quickly look
sewardjffce8152011-06-24 10:09:41 +00002859 up the dead-thread entries as we work through the VTSs. */
philippec3508652015-03-28 12:01:58 +00002860 verydead_thread_table_sort_and_check (verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00002861
2862 /* We will run through the old table, and create a new table and
philippea1ac2f42015-05-01 17:12:00 +00002863 set, at the same time setting the u.remap entries in the old
sewardjffce8152011-06-24 10:09:41 +00002864 table to point to the new entries. Then, visit every VtsID in
2865 the system, and replace all of them with new ones, using the
philippea1ac2f42015-05-01 17:12:00 +00002866 u.remap entries in the old table. Finally, we can delete the old
sewardjffce8152011-06-24 10:09:41 +00002867 table and set. */
2868
2869 XArray* /* of VtsTE */ new_tab
2870 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
2871 HG_(free), sizeof(VtsTE) );
2872
2873 /* WordFM VTS* void */
2874 WordFM* new_set
2875 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
2876 HG_(free),
2877 (Word(*)(UWord,UWord))VTS__cmp_structural );
2878
2879 /* Visit each old VTS. For each one:
2880
2881 * make a pruned version
2882
2883 * search new_set for the pruned version, yielding either
2884 Nothing (not present) or the new VtsID for it.
2885
2886 * if not present, allocate a new VtsID for it, insert (pruned
2887 VTS, new VtsID) in the tree, and set
2888 remap_table[old VtsID] = new VtsID.
2889
2890 * if present, set remap_table[old VtsID] = new VtsID, where
2891 new VtsID was determined by the tree lookup. Then free up
2892 the clone.
2893 */
2894
2895 UWord nBeforePruning = 0, nAfterPruning = 0;
2896 UWord nSTSsBefore = 0, nSTSsAfter = 0;
2897 VtsID new_VtsID_ctr = 0;
2898
2899 for (i = 0; i < nTab; i++) {
2900
2901 /* For each old VTS .. */
2902 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
2903 VTS* old_vts = old_te->vts;
sewardjffce8152011-06-24 10:09:41 +00002904
2905 /* Skip it if not in use */
2906 if (old_te->rc == 0) {
2907 tl_assert(old_vts == NULL);
2908 continue;
2909 }
philippea1ac2f42015-05-01 17:12:00 +00002910 tl_assert(old_te->u.remap == VtsID_INVALID);
sewardjffce8152011-06-24 10:09:41 +00002911 tl_assert(old_vts != NULL);
2912 tl_assert(old_vts->id == i);
2913 tl_assert(old_vts->ts != NULL);
2914
2915 /* It is in use. Make a pruned version. */
2916 nBeforePruning++;
2917 nSTSsBefore += old_vts->usedTS;
2918 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
philippec3508652015-03-28 12:01:58 +00002919 old_vts, verydead_thread_table_not_pruned);
sewardjffce8152011-06-24 10:09:41 +00002920 tl_assert(new_vts->sizeTS == new_vts->usedTS);
2921 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
2922 == 0x0ddC0ffeeBadF00dULL);
2923
2924 /* Get rid of the old VTS and the tree entry. It's a bit more
2925 complex to incrementally delete the VTSs now than to nuke
2926 them all after we're done, but the upside is that we don't
2927 wind up temporarily storing potentially two complete copies
2928 of each VTS and hence spiking memory use. */
2929 UWord oldK = 0, oldV = 12345;
2930 Bool present = VG_(delFromFM)( vts_set,
2931 &oldK, &oldV, (UWord)old_vts );
2932 tl_assert(present); /* else it isn't in vts_set ?! */
2933 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2934 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
2935 /* now free the VTS itself */
2936 VTS__delete(old_vts);
2937 old_te->vts = NULL;
2938 old_vts = NULL;
2939
2940 /* NO MENTIONS of old_vts allowed beyond this point. */
2941
2942 /* Ok, we have the pruned copy in new_vts. See if a
2943 structurally identical version is already present in new_set.
2944 If so, delete the one we just made and move on; if not, add
2945 it. */
2946 VTS* identical_version = NULL;
2947 UWord valW = 12345;
2948 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
2949 (UWord)new_vts)) {
2950 // already have it
2951 tl_assert(valW == 0);
2952 tl_assert(identical_version != NULL);
2953 tl_assert(identical_version != new_vts);
2954 VTS__delete(new_vts);
2955 new_vts = identical_version;
2956 tl_assert(new_vts->id != VtsID_INVALID);
2957 } else {
2958 tl_assert(valW == 12345);
2959 tl_assert(identical_version == NULL);
2960 new_vts->id = new_VtsID_ctr++;
2961 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
2962 tl_assert(!b);
2963 VtsTE new_te;
2964 new_te.vts = new_vts;
2965 new_te.rc = 0;
philippea1ac2f42015-05-01 17:12:00 +00002966 new_te.u.freelink = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00002967 Word j = VG_(addToXA)( new_tab, &new_te );
2968 tl_assert(j <= i);
2969 tl_assert(j == new_VtsID_ctr - 1);
2970 // stats
2971 nAfterPruning++;
2972 nSTSsAfter += new_vts->usedTS;
2973 }
philippea1ac2f42015-05-01 17:12:00 +00002974 old_te->u.remap = new_vts->id;
sewardjffce8152011-06-24 10:09:41 +00002975
2976 } /* for (i = 0; i < nTab; i++) */
2977
philippec3508652015-03-28 12:01:58 +00002978 /* Move very dead thread from verydead_thread_table_not_pruned to
2979 verydead_thread_table. Sort and check verydead_thread_table
2980 to verify a thread was reported very dead only once. */
2981 {
2982 UWord nBT = VG_(sizeXA)( verydead_thread_table_not_pruned);
2983
2984 for (i = 0; i < nBT; i++) {
2985 ThrID thrid =
2986 *(ThrID*)VG_(indexXA)( verydead_thread_table_not_pruned, i );
2987 VG_(addToXA)( verydead_thread_table, &thrid );
2988 }
2989 verydead_thread_table_sort_and_check (verydead_thread_table);
2990 VG_(dropHeadXA) (verydead_thread_table_not_pruned, nBT);
2991 }
2992
sewardjffce8152011-06-24 10:09:41 +00002993 /* At this point, we have:
philippea1ac2f42015-05-01 17:12:00 +00002994 * the old VTS table, with its u.remap entries set,
sewardjffce8152011-06-24 10:09:41 +00002995 and with all .vts == NULL.
2996 * the old VTS tree should be empty, since it and the old VTSs
2997 it contained have been incrementally deleted was we worked
2998 through the old table.
philippea1ac2f42015-05-01 17:12:00 +00002999 * the new VTS table, with all .rc == 0, all u.freelink and u.remap
sewardjffce8152011-06-24 10:09:41 +00003000 == VtsID_INVALID.
3001 * the new VTS tree.
3002 */
3003 tl_assert( VG_(sizeFM)(vts_set) == 0 );
3004
3005 /* Now actually apply the mapping. */
3006 /* Visit all the VtsIDs in the entire system. Where do we expect
3007 to find them?
3008 (a) in shadow memory -- the LineZs and LineFs
3009 (b) in our collection of struct _Thrs.
3010 (c) in our collection of struct _SOs.
3011 Nowhere else, AFAICS. Not in the zsm cache, because that just
3012 got invalidated.
3013
philippea1ac2f42015-05-01 17:12:00 +00003014 Using the u.remap fields in vts_tab, map each old VtsID to a new
sewardjffce8152011-06-24 10:09:41 +00003015 VtsID. For each old VtsID, dec its rc; and for each new one,
3016 inc it. This sets up the new refcounts, and it also gives a
3017 cheap sanity check of the old ones: all old refcounts should be
3018 zero after this operation.
3019 */
3020
3021 /* Do the mappings for (a) above: iterate over the Primary shadow
3022 mem map (WordFM Addr SecMap*). */
3023 UWord secmapW = 0;
3024 VG_(initIterFM)( map_shmem );
3025 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3026 UWord j;
3027 SecMap* sm = (SecMap*)secmapW;
3028 tl_assert(sm->magic == SecMap_MAGIC);
3029 /* Deal with the LineZs */
3030 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3031 LineZ* lineZ = &sm->linesZ[i];
3032 if (lineZ->dict[0] == SVal_INVALID)
3033 continue; /* not in use -- data is in F rep instead */
3034 for (j = 0; j < 4; j++)
3035 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3036 }
3037 /* Deal with the LineFs */
3038 for (i = 0; i < sm->linesF_size; i++) {
3039 LineF* lineF = &sm->linesF[i];
3040 if (!lineF->inUse)
3041 continue;
3042 for (j = 0; j < N_LINE_ARANGE; j++)
3043 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3044 }
3045 }
3046 VG_(doneIterFM)( map_shmem );
3047
3048 /* Do the mappings for (b) above: visit our collection of struct
3049 _Thrs. */
3050 Thread* hgthread = get_admin_threads();
3051 tl_assert(hgthread);
3052 while (hgthread) {
3053 Thr* hbthr = hgthread->hbthr;
3054 tl_assert(hbthr);
3055 /* Threads that are listed in the prunable set have their viR
3056 and viW set to VtsID_INVALID, so we can't mess with them. */
3057 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3058 tl_assert(hbthr->viR == VtsID_INVALID);
3059 tl_assert(hbthr->viW == VtsID_INVALID);
3060 hgthread = hgthread->admin;
3061 continue;
3062 }
3063 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3064 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3065 hgthread = hgthread->admin;
3066 }
3067
3068 /* Do the mappings for (c) above: visit the struct _SOs. */
3069 SO* so = admin_SO;
3070 while (so) {
3071 if (so->viR != VtsID_INVALID)
3072 remap_VtsID( vts_tab, new_tab, &so->viR );
3073 if (so->viW != VtsID_INVALID)
3074 remap_VtsID( vts_tab, new_tab, &so->viW );
3075 so = so->admin_next;
3076 }
3077
3078 /* So, we're nearly done (with this incredibly complex operation).
3079 Check the refcounts for the old VtsIDs all fell to zero, as
3080 expected. Any failure is serious. */
3081 for (i = 0; i < nTab; i++) {
3082 VtsTE* te = VG_(indexXA)( vts_tab, i );
3083 tl_assert(te->vts == NULL);
3084 /* This is the assert proper. Note we're also asserting
philippea1ac2f42015-05-01 17:12:00 +00003085 zeroness for old entries which are unmapped. That's OK. */
sewardjffce8152011-06-24 10:09:41 +00003086 tl_assert(te->rc == 0);
3087 }
3088
3089 /* Install the new table and set. */
3090 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3091 vts_set = new_set;
3092 VG_(deleteXA)( vts_tab );
3093 vts_tab = new_tab;
3094
3095 /* The freelist of vts_tab entries is empty now, because we've
3096 compacted all of the live entries at the low end of the
3097 table. */
3098 vts_tab_freelist = VtsID_INVALID;
3099
3100 /* Sanity check vts_set and vts_tab. */
3101
3102 /* Because all the live entries got slid down to the bottom of vts_tab: */
3103 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3104
3105 /* Assert that the vts_tab and vts_set entries point at each other
3106 in the required way */
3107 UWord wordK = 0, wordV = 0;
3108 VG_(initIterFM)( vts_set );
3109 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3110 tl_assert(wordK != 0);
3111 tl_assert(wordV == 0);
3112 VTS* vts = (VTS*)wordK;
3113 tl_assert(vts->id != VtsID_INVALID);
3114 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3115 tl_assert(te->vts == vts);
3116 }
3117 VG_(doneIterFM)( vts_set );
3118
3119 /* Also iterate over the table, and check each entry is
3120 plausible. */
3121 nTab = VG_(sizeXA)( vts_tab );
3122 for (i = 0; i < nTab; i++) {
3123 VtsTE* te = VG_(indexXA)( vts_tab, i );
3124 tl_assert(te->vts);
3125 tl_assert(te->vts->id == i);
3126 tl_assert(te->rc > 0); /* 'cos we just GC'd */
philippea1ac2f42015-05-01 17:12:00 +00003127 tl_assert(te->u.freelink == VtsID_INVALID); /* in use */
3128 /* value of te->u.remap not relevant */
sewardjffce8152011-06-24 10:09:41 +00003129 }
3130
3131 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
3132 if (VG_(clo_stats)) {
3133 static UInt ctr = 1;
3134 tl_assert(nTab > 0);
3135 VG_(message)(
3136 Vg_DebugMsg,
3137 "libhb: VTS PR: #%u before %lu (avg sz %lu) "
3138 "after %lu (avg sz %lu)\n",
3139 ctr++,
3140 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3141 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3142 );
3143 }
sewardjffce8152011-06-24 10:09:41 +00003144 /* ---------- END VTS PRUNING ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00003145}
3146
3147
3148/////////////////////////////////////////////////////////
3149// //
3150// Vts IDs //
3151// //
3152/////////////////////////////////////////////////////////
3153
3154//////////////////////////
sewardj7aa38a92011-02-27 23:04:12 +00003155/* A temporary, max-sized VTS which is used as a temporary (the first
3156 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3157static VTS* temp_max_sized_VTS = NULL;
3158
3159//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00003160static ULong stats__cmpLEQ_queries = 0;
3161static ULong stats__cmpLEQ_misses = 0;
3162static ULong stats__join2_queries = 0;
3163static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003164
3165static inline UInt ROL32 ( UInt w, Int n ) {
3166 w = (w << n) | (w >> (32-n));
3167 return w;
3168}
3169static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3170 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3171 return hash % nTab;
3172}
3173
sewardj23f12002009-07-24 08:45:08 +00003174#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00003175static
sewardj23f12002009-07-24 08:45:08 +00003176 struct { VtsID vi1; VtsID vi2; Bool leq; }
3177 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00003178
3179#define N_JOIN2_CACHE 1023
3180static
3181 struct { VtsID vi1; VtsID vi2; VtsID res; }
3182 join2_cache[N_JOIN2_CACHE];
3183
3184static void VtsID__invalidate_caches ( void ) {
3185 Int i;
sewardj23f12002009-07-24 08:45:08 +00003186 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3187 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3188 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3189 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00003190 }
3191 for (i = 0; i < N_JOIN2_CACHE; i++) {
3192 join2_cache[i].vi1 = VtsID_INVALID;
3193 join2_cache[i].vi2 = VtsID_INVALID;
3194 join2_cache[i].res = VtsID_INVALID;
3195 }
3196}
3197//////////////////////////
3198
sewardjd52392d2008-11-08 20:36:26 +00003199//static Bool VtsID__is_valid ( VtsID vi ) {
3200// VtsTE* ve;
3201// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3202// return False;
3203// ve = VG_(indexXA)( vts_tab, vi );
3204// if (!ve->vts)
3205// return False;
3206// tl_assert(ve->vts->id == vi);
3207// return True;
3208//}
sewardjf98e1c02008-10-25 16:22:41 +00003209
3210static VTS* VtsID__to_VTS ( VtsID vi ) {
3211 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3212 tl_assert(te->vts);
3213 return te->vts;
3214}
3215
3216static void VtsID__pp ( VtsID vi ) {
sewardjf98e1c02008-10-25 16:22:41 +00003217 VTS* vts = VtsID__to_VTS(vi);
florianb28fe892014-10-28 20:52:07 +00003218 VTS__show( vts );
sewardjf98e1c02008-10-25 16:22:41 +00003219}
3220
3221/* compute partial ordering relation of vi1 and vi2. */
3222__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00003223static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00003224 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00003225 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00003226 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00003227 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00003228 tl_assert(vi1 != vi2);
3229 ////++
sewardj23f12002009-07-24 08:45:08 +00003230 stats__cmpLEQ_queries++;
3231 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3232 if (cmpLEQ_cache[hash].vi1 == vi1
3233 && cmpLEQ_cache[hash].vi2 == vi2)
3234 return cmpLEQ_cache[hash].leq;
3235 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00003236 ////--
3237 v1 = VtsID__to_VTS(vi1);
3238 v2 = VtsID__to_VTS(vi2);
sewardje4cce742011-02-24 15:25:24 +00003239 leq = VTS__cmpLEQ( v1, v2 ) == 0;
sewardjf98e1c02008-10-25 16:22:41 +00003240 ////++
sewardj23f12002009-07-24 08:45:08 +00003241 cmpLEQ_cache[hash].vi1 = vi1;
3242 cmpLEQ_cache[hash].vi2 = vi2;
3243 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00003244 ////--
sewardj23f12002009-07-24 08:45:08 +00003245 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00003246}
sewardj23f12002009-07-24 08:45:08 +00003247static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3248 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003249}
3250
3251/* compute binary join */
3252__attribute__((noinline))
3253static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3254 UInt hash;
3255 VtsID res;
sewardj7aa38a92011-02-27 23:04:12 +00003256 VTS *vts1, *vts2;
sewardjf98e1c02008-10-25 16:22:41 +00003257 //if (vi1 == vi2) return vi1;
3258 tl_assert(vi1 != vi2);
3259 ////++
3260 stats__join2_queries++;
3261 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3262 if (join2_cache[hash].vi1 == vi1
3263 && join2_cache[hash].vi2 == vi2)
3264 return join2_cache[hash].res;
3265 stats__join2_misses++;
3266 ////--
3267 vts1 = VtsID__to_VTS(vi1);
3268 vts2 = VtsID__to_VTS(vi2);
sewardj7aa38a92011-02-27 23:04:12 +00003269 temp_max_sized_VTS->usedTS = 0;
3270 VTS__join(temp_max_sized_VTS, vts1,vts2);
3271 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003272 ////++
3273 join2_cache[hash].vi1 = vi1;
3274 join2_cache[hash].vi2 = vi2;
3275 join2_cache[hash].res = res;
3276 ////--
3277 return res;
3278}
3279static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003280 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003281}
3282
3283/* create a singleton VTS, namely [thr:1] */
3284static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
sewardj7aa38a92011-02-27 23:04:12 +00003285 temp_max_sized_VTS->usedTS = 0;
3286 VTS__singleton(temp_max_sized_VTS, thr,tym);
3287 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003288}
3289
3290/* tick operation, creates value 1 if specified index is absent */
3291static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3292 VTS* vts = VtsID__to_VTS(vi);
sewardj7aa38a92011-02-27 23:04:12 +00003293 temp_max_sized_VTS->usedTS = 0;
3294 VTS__tick(temp_max_sized_VTS, idx,vts);
3295 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003296}
3297
3298/* index into a VTS (only for assertions) */
3299static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3300 VTS* vts = VtsID__to_VTS(vi);
3301 return VTS__indexAt_SLOW( vts, idx );
3302}
3303
sewardj23f12002009-07-24 08:45:08 +00003304/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3305 any, really) element in vi1 which is pointwise greater-than the
3306 corresponding element in vi2. If no such element exists, return
3307 NULL. This needs to be fairly quick since it is called every time
3308 a race is detected. */
3309static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3310{
3311 VTS *vts1, *vts2;
sewardje4cce742011-02-24 15:25:24 +00003312 Thr* diffthr;
3313 ThrID diffthrid;
sewardj23f12002009-07-24 08:45:08 +00003314 tl_assert(vi1 != vi2);
3315 vts1 = VtsID__to_VTS(vi1);
3316 vts2 = VtsID__to_VTS(vi2);
3317 tl_assert(vts1 != vts2);
sewardje4cce742011-02-24 15:25:24 +00003318 diffthrid = VTS__cmpLEQ(vts1, vts2);
3319 diffthr = Thr__from_ThrID(diffthrid);
sewardj23f12002009-07-24 08:45:08 +00003320 tl_assert(diffthr); /* else they are LEQ ! */
3321 return diffthr;
3322}
3323
3324
3325/////////////////////////////////////////////////////////
3326// //
3327// Filters //
3328// //
3329/////////////////////////////////////////////////////////
3330
sewardj23f12002009-07-24 08:45:08 +00003331/* Forget everything we know -- clear the filter and let everything
3332 through. This needs to be as fast as possible, since it is called
3333 every time the running thread changes, and every time a thread's
3334 vector clocks change, which can be quite frequent. The obvious
3335 fast way to do this is simply to stuff in tags which we know are
3336 not going to match anything, since they're not aligned to the start
3337 of a line. */
florian6bd9dc12012-11-23 16:17:43 +00003338static void Filter__clear ( Filter* fi, const HChar* who )
sewardj23f12002009-07-24 08:45:08 +00003339{
3340 UWord i;
3341 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3342 for (i = 0; i < FI_NUM_LINES; i += 8) {
3343 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3344 fi->tags[i+1] = 1;
3345 fi->tags[i+2] = 1;
3346 fi->tags[i+3] = 1;
3347 fi->tags[i+4] = 1;
3348 fi->tags[i+5] = 1;
3349 fi->tags[i+6] = 1;
3350 fi->tags[i+7] = 1;
3351 }
3352 tl_assert(i == FI_NUM_LINES);
3353}
3354
3355/* Clearing an arbitrary range in the filter. Unfortunately
3356 we have to do this due to core-supplied new/die-mem events. */
3357
3358static void Filter__clear_1byte ( Filter* fi, Addr a )
3359{
3360 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3361 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3362 FiLine* line = &fi->lines[lineno];
3363 UWord loff = (a - atag) / 8;
3364 UShort mask = 0x3 << (2 * (a & 7));
3365 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3366 if (LIKELY( fi->tags[lineno] == atag )) {
3367 /* hit. clear the bits. */
3368 UShort u16 = line->u16s[loff];
3369 line->u16s[loff] = u16 & ~mask; /* clear them */
3370 } else {
3371 /* miss. The filter doesn't hold this address, so ignore. */
3372 }
3373}
3374
3375static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3376{
3377 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3378 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3379 FiLine* line = &fi->lines[lineno];
3380 UWord loff = (a - atag) / 8;
3381 if (LIKELY( fi->tags[lineno] == atag )) {
3382 line->u16s[loff] = 0;
3383 } else {
3384 /* miss. The filter doesn't hold this address, so ignore. */
3385 }
3386}
3387
3388static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3389{
3390 //VG_(printf)("%lu ", len);
3391 /* slowly do part preceding 8-alignment */
3392 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3393 Filter__clear_1byte( fi, a );
3394 a++;
3395 len--;
3396 }
3397 /* vector loop */
3398 while (len >= 8) {
3399 Filter__clear_8bytes_aligned( fi, a );
3400 a += 8;
3401 len -= 8;
3402 }
3403 /* slowly do tail */
3404 while (UNLIKELY(len > 0)) {
3405 Filter__clear_1byte( fi, a );
3406 a++;
3407 len--;
3408 }
3409}
3410
3411
3412/* ------ Read handlers for the filter. ------ */
3413
3414static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3415{
3416 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3417 return False;
3418 {
3419 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3420 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3421 FiLine* line = &fi->lines[lineno];
3422 UWord loff = (a - atag) / 8;
3423 UShort mask = 0xAAAA;
3424 if (LIKELY( fi->tags[lineno] == atag )) {
3425 /* hit. check line and update. */
3426 UShort u16 = line->u16s[loff];
3427 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3428 line->u16s[loff] = u16 | mask; /* set them */
3429 return ok;
3430 } else {
3431 /* miss. nuke existing line and re-use it. */
3432 UWord i;
3433 fi->tags[lineno] = atag;
3434 for (i = 0; i < FI_LINE_SZB / 8; i++)
3435 line->u16s[i] = 0;
3436 line->u16s[loff] = mask;
3437 return False;
3438 }
3439 }
3440}
3441
3442static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3443{
3444 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3445 return False;
3446 {
3447 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3448 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3449 FiLine* line = &fi->lines[lineno];
3450 UWord loff = (a - atag) / 8;
3451 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3452 if (LIKELY( fi->tags[lineno] == atag )) {
3453 /* hit. check line and update. */
3454 UShort u16 = line->u16s[loff];
3455 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3456 line->u16s[loff] = u16 | mask; /* set them */
3457 return ok;
3458 } else {
3459 /* miss. nuke existing line and re-use it. */
3460 UWord i;
3461 fi->tags[lineno] = atag;
3462 for (i = 0; i < FI_LINE_SZB / 8; i++)
3463 line->u16s[i] = 0;
3464 line->u16s[loff] = mask;
3465 return False;
3466 }
3467 }
3468}
3469
3470static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3471{
3472 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3473 return False;
3474 {
3475 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3476 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3477 FiLine* line = &fi->lines[lineno];
3478 UWord loff = (a - atag) / 8;
3479 UShort mask = 0xA << (2 * (a & 6));
3480 /* mask is A000, 0A00, 00A0 or 000A */
3481 if (LIKELY( fi->tags[lineno] == atag )) {
3482 /* hit. check line and update. */
3483 UShort u16 = line->u16s[loff];
3484 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3485 line->u16s[loff] = u16 | mask; /* set them */
3486 return ok;
3487 } else {
3488 /* miss. nuke existing line and re-use it. */
3489 UWord i;
3490 fi->tags[lineno] = atag;
3491 for (i = 0; i < FI_LINE_SZB / 8; i++)
3492 line->u16s[i] = 0;
3493 line->u16s[loff] = mask;
3494 return False;
3495 }
3496 }
3497}
3498
3499static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3500{
3501 {
3502 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3503 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3504 FiLine* line = &fi->lines[lineno];
3505 UWord loff = (a - atag) / 8;
3506 UShort mask = 0x2 << (2 * (a & 7));
3507 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3508 if (LIKELY( fi->tags[lineno] == atag )) {
3509 /* hit. check line and update. */
3510 UShort u16 = line->u16s[loff];
3511 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3512 line->u16s[loff] = u16 | mask; /* set them */
3513 return ok;
3514 } else {
3515 /* miss. nuke existing line and re-use it. */
3516 UWord i;
3517 fi->tags[lineno] = atag;
3518 for (i = 0; i < FI_LINE_SZB / 8; i++)
3519 line->u16s[i] = 0;
3520 line->u16s[loff] = mask;
3521 return False;
3522 }
3523 }
3524}
3525
3526
3527/* ------ Write handlers for the filter. ------ */
3528
3529static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3530{
3531 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3532 return False;
3533 {
3534 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3535 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3536 FiLine* line = &fi->lines[lineno];
3537 UWord loff = (a - atag) / 8;
3538 UShort mask = 0xFFFF;
3539 if (LIKELY( fi->tags[lineno] == atag )) {
3540 /* hit. check line and update. */
3541 UShort u16 = line->u16s[loff];
3542 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3543 line->u16s[loff] = u16 | mask; /* set them */
3544 return ok;
3545 } else {
3546 /* miss. nuke existing line and re-use it. */
3547 UWord i;
3548 fi->tags[lineno] = atag;
3549 for (i = 0; i < FI_LINE_SZB / 8; i++)
3550 line->u16s[i] = 0;
3551 line->u16s[loff] = mask;
3552 return False;
3553 }
3554 }
3555}
3556
3557static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3558{
3559 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3560 return False;
3561 {
3562 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3563 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3564 FiLine* line = &fi->lines[lineno];
3565 UWord loff = (a - atag) / 8;
3566 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3567 if (LIKELY( fi->tags[lineno] == atag )) {
3568 /* hit. check line and update. */
3569 UShort u16 = line->u16s[loff];
3570 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3571 line->u16s[loff] = u16 | mask; /* set them */
3572 return ok;
3573 } else {
3574 /* miss. nuke existing line and re-use it. */
3575 UWord i;
3576 fi->tags[lineno] = atag;
3577 for (i = 0; i < FI_LINE_SZB / 8; i++)
3578 line->u16s[i] = 0;
3579 line->u16s[loff] = mask;
3580 return False;
3581 }
3582 }
3583}
3584
3585static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3586{
3587 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3588 return False;
3589 {
3590 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3591 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3592 FiLine* line = &fi->lines[lineno];
3593 UWord loff = (a - atag) / 8;
3594 UShort mask = 0xF << (2 * (a & 6));
3595 /* mask is F000, 0F00, 00F0 or 000F */
3596 if (LIKELY( fi->tags[lineno] == atag )) {
3597 /* hit. check line and update. */
3598 UShort u16 = line->u16s[loff];
3599 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3600 line->u16s[loff] = u16 | mask; /* set them */
3601 return ok;
3602 } else {
3603 /* miss. nuke existing line and re-use it. */
3604 UWord i;
3605 fi->tags[lineno] = atag;
3606 for (i = 0; i < FI_LINE_SZB / 8; i++)
3607 line->u16s[i] = 0;
3608 line->u16s[loff] = mask;
3609 return False;
3610 }
3611 }
3612}
3613
3614static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3615{
3616 {
3617 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3618 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3619 FiLine* line = &fi->lines[lineno];
3620 UWord loff = (a - atag) / 8;
3621 UShort mask = 0x3 << (2 * (a & 7));
3622 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3623 if (LIKELY( fi->tags[lineno] == atag )) {
3624 /* hit. check line and update. */
3625 UShort u16 = line->u16s[loff];
3626 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3627 line->u16s[loff] = u16 | mask; /* set them */
3628 return ok;
3629 } else {
3630 /* miss. nuke existing line and re-use it. */
3631 UWord i;
3632 fi->tags[lineno] = atag;
3633 for (i = 0; i < FI_LINE_SZB / 8; i++)
3634 line->u16s[i] = 0;
3635 line->u16s[loff] = mask;
3636 return False;
3637 }
3638 }
3639}
3640
sewardjf98e1c02008-10-25 16:22:41 +00003641
3642/////////////////////////////////////////////////////////
3643// //
3644// Threads //
3645// //
3646/////////////////////////////////////////////////////////
3647
sewardje4cce742011-02-24 15:25:24 +00003648/* Maps ThrID values to their Thr*s (which contain ThrID values that
3649 should point back to the relevant slot in the array. Lowest
3650 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
3651static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
3652
3653/* And a counter to dole out ThrID values. For rationale/background,
3654 see comments on definition of ScalarTS (far) above. */
sewardj7aa38a92011-02-27 23:04:12 +00003655static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
sewardje4cce742011-02-24 15:25:24 +00003656
3657static ThrID Thr__to_ThrID ( Thr* thr ) {
3658 return thr->thrid;
3659}
3660static Thr* Thr__from_ThrID ( UInt thrid ) {
3661 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
3662 tl_assert(thr->thrid == thrid);
3663 return thr;
3664}
3665
3666static Thr* Thr__new ( void )
3667{
sewardjf98e1c02008-10-25 16:22:41 +00003668 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
3669 thr->viR = VtsID_INVALID;
3670 thr->viW = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003671 thr->llexit_done = False;
3672 thr->joinedwith_done = False;
sewardj23f12002009-07-24 08:45:08 +00003673 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
philippeca903bb2014-04-26 22:50:08 +00003674 if (HG_(clo_history_level) == 1)
3675 thr->local_Kws_n_stacks
3676 = VG_(newXA)( HG_(zalloc),
3677 "libhb.Thr__new.3 (local_Kws_and_stacks)",
3678 HG_(free), sizeof(ULong_n_EC) );
sewardje4cce742011-02-24 15:25:24 +00003679
3680 /* Add this Thr* <-> ThrID binding to the mapping, and
3681 cross-check */
3682 if (!thrid_to_thr_map) {
3683 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
3684 HG_(free), sizeof(Thr*) );
sewardje4cce742011-02-24 15:25:24 +00003685 }
3686
sewardj7aa38a92011-02-27 23:04:12 +00003687 if (thrid_counter >= ThrID_MAX_VALID) {
sewardje4cce742011-02-24 15:25:24 +00003688 /* We're hosed. We have to stop. */
3689 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
3690 }
3691
3692 thr->thrid = thrid_counter++;
3693 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
3694 tl_assert(ix + 1024 == thr->thrid);
3695
sewardjf98e1c02008-10-25 16:22:41 +00003696 return thr;
3697}
3698
sewardj8ab2c132009-08-02 09:34:35 +00003699static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00003700{
3701 Word nPresent;
3702 ULong_n_EC pair;
3703 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00003704
3705 // We only collect this info at history level 1 (approx)
3706 if (HG_(clo_history_level) != 1)
3707 return;
3708
sewardj8ab2c132009-08-02 09:34:35 +00003709 /* This is the scalar Kw for thr. */
3710 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00003711 pair.ec = main_get_EC( thr );
3712 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00003713 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00003714
3715 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00003716 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00003717
3718 /* Throw away old stacks, if necessary. We can't accumulate stuff
3719 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00003720 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
3721 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
3722 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
3723 if (0)
3724 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00003725 thr, pair.ull, pair.ec );
3726 }
3727
3728 if (nPresent > 0) {
3729 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00003730 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
3731 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00003732 }
3733
3734 if (nPresent == 0)
3735 pair.ec = NULL;
3736
sewardj8ab2c132009-08-02 09:34:35 +00003737 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00003738
3739 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00003740 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00003741 thr, pair.ull, pair.ec );
3742 if (0)
3743 VG_(pp_ExeContext)(pair.ec);
3744}
3745
florian6bd9dc12012-11-23 16:17:43 +00003746static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
3747 const ULong_n_EC* pair2 )
sewardj23f12002009-07-24 08:45:08 +00003748{
3749 if (pair1->ull < pair2->ull) return -1;
3750 if (pair1->ull > pair2->ull) return 1;
3751 return 0;
3752}
3753
sewardjf98e1c02008-10-25 16:22:41 +00003754
3755/////////////////////////////////////////////////////////
3756// //
3757// Shadow Values //
3758// //
3759/////////////////////////////////////////////////////////
3760
3761// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
3762// hb_zsm.h. We have to do everything else here.
3763
3764/* SVal is 64 bit unsigned int.
3765
3766 <---------30---------> <---------30--------->
3767 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00003768 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00003769 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
3770
sewardjf98e1c02008-10-25 16:22:41 +00003771*/
3772#define SVAL_TAGMASK (3ULL << 62)
3773
3774static inline Bool SVal__isC ( SVal s ) {
3775 return (0ULL << 62) == (s & SVAL_TAGMASK);
3776}
3777static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
3778 //tl_assert(VtsID__is_valid(rmini));
3779 //tl_assert(VtsID__is_valid(wmini));
3780 return (((ULong)rmini) << 32) | ((ULong)wmini);
3781}
3782static inline VtsID SVal__unC_Rmin ( SVal s ) {
3783 tl_assert(SVal__isC(s));
3784 return (VtsID)(s >> 32);
3785}
3786static inline VtsID SVal__unC_Wmin ( SVal s ) {
3787 tl_assert(SVal__isC(s));
3788 return (VtsID)(s & 0xFFFFFFFFULL);
3789}
3790
sewardj23f12002009-07-24 08:45:08 +00003791static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00003792 return (2ULL << 62) == (s & SVAL_TAGMASK);
3793}
sewardj5aa09bf2014-06-20 14:25:53 +00003794__attribute__((unused))
sewardj23f12002009-07-24 08:45:08 +00003795static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00003796 return 2ULL << 62;
3797}
3798
3799/* Direct callback from lib_zsm. */
3800static void SVal__rcinc ( SVal s ) {
3801 if (SVal__isC(s)) {
3802 VtsID__rcinc( SVal__unC_Rmin(s) );
3803 VtsID__rcinc( SVal__unC_Wmin(s) );
3804 }
3805}
3806
3807/* Direct callback from lib_zsm. */
3808static void SVal__rcdec ( SVal s ) {
3809 if (SVal__isC(s)) {
3810 VtsID__rcdec( SVal__unC_Rmin(s) );
3811 VtsID__rcdec( SVal__unC_Wmin(s) );
3812 }
3813}
3814
3815
3816/////////////////////////////////////////////////////////
3817// //
3818// Change-event map2 //
3819// //
3820/////////////////////////////////////////////////////////
3821
sewardjf98e1c02008-10-25 16:22:41 +00003822/* This is in two parts:
3823
sewardj23f12002009-07-24 08:45:08 +00003824 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00003825 traces. When the reference count of a stack trace becomes zero,
3826 it is removed from the set and freed up. The intent is to have
3827 a set of stack traces which can be referred to from (2), but to
3828 only represent each one once. The set is indexed/searched by
3829 ordering on the stack trace vectors.
3830
sewardj849b0ed2008-12-21 10:43:10 +00003831 2. A SparseWA of OldRefs. These store information about each old
3832 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00003833 location for which the information is recorded. For LRU
philippecabdbb52015-04-20 21:33:16 +00003834 purposes, each OldRef in the SparseWA is also on a doubly
3835 linked list maintaining the order in which the OldRef were most
3836 recently accessed.
sewardjf98e1c02008-10-25 16:22:41 +00003837
3838 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00003839 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3840 size) triples to RCECs. This allows us to collect the last
3841 access-traceback by up to N_OLDREF_ACCS different triples for
3842 this location. The accs[] array is a MTF-array. If a binding
3843 falls off the end, that's too bad -- we will lose info about
3844 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00003845
philippecabdbb52015-04-20 21:33:16 +00003846 We allocate a maximum of VG_(clo_conflict_cache_size) OldRef.
3847 Then we do exact LRU discarding. For each discarded OldRef we must
sewardjf98e1c02008-10-25 16:22:41 +00003848 of course decrement the reference count on the all RCECs it
3849 refers to, in order that entries from (1) eventually get
3850 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00003851
3852 A major improvement in reliability of this mechanism would be to
3853 have a dynamically sized OldRef.accs[] array, so no entries ever
3854 fall off the end. In investigations (Dec 08) it appears that a
3855 major cause for the non-availability of conflicting-access traces
3856 in race reports is caused by the fixed size of this array. I
3857 suspect for most OldRefs, only a few entries are used, but for a
3858 minority of cases there is an overflow, leading to info lossage.
3859 Investigations also suggest this is very workload and scheduling
3860 sensitive. Therefore a dynamic sizing would be better.
3861
philippe6643e962012-01-17 21:16:30 +00003862 However, dynamic sizing would defeat the use of a PoolAllocator
sewardj849b0ed2008-12-21 10:43:10 +00003863 for OldRef structures. And that's important for performance. So
3864 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00003865*/
3866
3867
3868static UWord stats__ctxt_rcdec1 = 0;
3869static UWord stats__ctxt_rcdec2 = 0;
3870static UWord stats__ctxt_rcdec3 = 0;
3871static UWord stats__ctxt_rcdec_calls = 0;
3872static UWord stats__ctxt_rcdec_discards = 0;
3873static UWord stats__ctxt_rcdec1_eq = 0;
3874
3875static UWord stats__ctxt_tab_curr = 0;
3876static UWord stats__ctxt_tab_max = 0;
3877
3878static UWord stats__ctxt_tab_qs = 0;
3879static UWord stats__ctxt_tab_cmps = 0;
3880
3881
3882///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00003883//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00003884///
3885
3886#define N_FRAMES 8
3887
3888// (UInt) `echo "Reference Counted Execution Context" | md5sum`
3889#define RCEC_MAGIC 0xab88abb2UL
3890
3891//#define N_RCEC_TAB 98317 /* prime */
3892#define N_RCEC_TAB 196613 /* prime */
3893
3894typedef
3895 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00003896 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003897 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00003898 UWord rc;
3899 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00003900 UWord frames_hash; /* hash of all the frames */
3901 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00003902 }
3903 RCEC;
3904
philippecabdbb52015-04-20 21:33:16 +00003905//////////// BEGIN RCEC pool allocator
3906static PoolAlloc* rcec_pool_allocator;
3907static RCEC* alloc_RCEC ( void ) {
3908 return VG_(allocEltPA) ( rcec_pool_allocator );
3909}
3910
3911static void free_RCEC ( RCEC* rcec ) {
3912 tl_assert(rcec->magic == RCEC_MAGIC);
3913 VG_(freeEltPA)( rcec_pool_allocator, rcec );
3914}
3915//////////// END RCEC pool allocator
3916
sewardjf98e1c02008-10-25 16:22:41 +00003917static RCEC** contextTab = NULL; /* hash table of RCEC*s */
3918
philippecabdbb52015-04-20 21:33:16 +00003919/* Count of allocated RCEC having ref count > 0 */
3920static UWord RCEC_referenced = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003921
3922/* Gives an arbitrary total order on RCEC .frames fields */
3923static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
3924 Word i;
3925 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
3926 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00003927 if (ec1->frames_hash < ec2->frames_hash) return -1;
3928 if (ec1->frames_hash > ec2->frames_hash) return 1;
3929 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003930 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00003931 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00003932 }
3933 return 0;
3934}
3935
3936
3937/* Dec the ref of this RCEC. */
3938static void ctxt__rcdec ( RCEC* ec )
3939{
3940 stats__ctxt_rcdec_calls++;
3941 tl_assert(ec && ec->magic == RCEC_MAGIC);
3942 tl_assert(ec->rc > 0);
3943 ec->rc--;
philippecabdbb52015-04-20 21:33:16 +00003944 if (ec->rc == 0)
3945 RCEC_referenced--;
sewardjf98e1c02008-10-25 16:22:41 +00003946}
3947
3948static void ctxt__rcinc ( RCEC* ec )
3949{
3950 tl_assert(ec && ec->magic == RCEC_MAGIC);
philippecabdbb52015-04-20 21:33:16 +00003951 if (ec->rc == 0)
3952 RCEC_referenced++;
sewardjf98e1c02008-10-25 16:22:41 +00003953 ec->rc++;
3954}
3955
3956
3957/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
3958 move it one step closer the the front of the list, so as to make
3959 subsequent searches for it cheaper. */
3960static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
3961{
3962 RCEC *ec0, *ec1, *ec2;
3963 if (ec == *headp)
3964 tl_assert(0); /* already at head of list */
3965 tl_assert(ec != NULL);
3966 ec0 = *headp;
3967 ec1 = NULL;
3968 ec2 = NULL;
3969 while (True) {
3970 if (ec0 == NULL || ec0 == ec) break;
3971 ec2 = ec1;
3972 ec1 = ec0;
3973 ec0 = ec0->next;
3974 }
3975 tl_assert(ec0 == ec);
3976 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
3977 RCEC* tmp;
3978 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
3979 predecessor. Swap ec0 and ec1, that is, move ec0 one step
3980 closer to the start of the list. */
3981 tl_assert(ec2->next == ec1);
3982 tl_assert(ec1->next == ec0);
3983 tmp = ec0->next;
3984 ec2->next = ec0;
3985 ec0->next = ec1;
3986 ec1->next = tmp;
3987 }
3988 else
3989 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
3990 /* it's second in the list. */
3991 tl_assert(*headp == ec1);
3992 tl_assert(ec1->next == ec0);
3993 ec1->next = ec0->next;
3994 ec0->next = ec1;
3995 *headp = ec0;
3996 }
3997}
3998
3999
4000/* Find the given RCEC in the tree, and return a pointer to it. Or,
4001 if not present, add the given one to the tree (by making a copy of
4002 it, so the caller can immediately deallocate the original) and
4003 return a pointer to the copy. The caller can safely have 'example'
4004 on its stack, since we will always return a pointer to a copy of
4005 it, not to the original. Note that the inserted node will have .rc
4006 of zero and so the caller must immediatly increment it. */
4007__attribute__((noinline))
4008static RCEC* ctxt__find_or_add ( RCEC* example )
4009{
4010 UWord hent;
4011 RCEC* copy;
4012 tl_assert(example && example->magic == RCEC_MAGIC);
4013 tl_assert(example->rc == 0);
4014
4015 /* Search the hash table to see if we already have it. */
4016 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00004017 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00004018 copy = contextTab[hent];
4019 while (1) {
4020 if (!copy) break;
4021 tl_assert(copy->magic == RCEC_MAGIC);
4022 stats__ctxt_tab_cmps++;
4023 if (0 == RCEC__cmp_by_frames(copy, example)) break;
4024 copy = copy->next;
4025 }
4026
4027 if (copy) {
4028 tl_assert(copy != example);
4029 /* optimisation: if it's not at the head of its list, move 1
4030 step fwds, to make future searches cheaper */
4031 if (copy != contextTab[hent]) {
4032 move_RCEC_one_step_forward( &contextTab[hent], copy );
4033 }
4034 } else {
sewardjd86e3a22008-12-03 11:39:37 +00004035 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00004036 tl_assert(copy != example);
4037 *copy = *example;
4038 copy->next = contextTab[hent];
4039 contextTab[hent] = copy;
4040 stats__ctxt_tab_curr++;
4041 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4042 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4043 }
4044 return copy;
4045}
4046
4047static inline UWord ROLW ( UWord w, Int n )
4048{
4049 Int bpw = 8 * sizeof(UWord);
4050 w = (w << n) | (w >> (bpw-n));
4051 return w;
4052}
4053
4054__attribute__((noinline))
4055static RCEC* get_RCEC ( Thr* thr )
4056{
4057 UWord hash, i;
4058 RCEC example;
4059 example.magic = RCEC_MAGIC;
4060 example.rc = 0;
4061 example.rcX = 0;
florian195623b2013-01-22 00:25:05 +00004062 example.next = NULL;
njn6c83d5e2009-05-05 23:46:24 +00004063 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00004064 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00004065 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004066 hash ^= example.frames[i];
4067 hash = ROLW(hash, 19);
4068 }
njn6c83d5e2009-05-05 23:46:24 +00004069 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00004070 return ctxt__find_or_add( &example );
4071}
4072
4073///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00004074//// Part (2):
4075/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00004076///
4077
sewardjffce8152011-06-24 10:09:41 +00004078/* Records an access: a thread, a context (size & writeness) and the
4079 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
4080 2, 10 = 4, 11 = 8.
sewardjc5ea9962008-12-07 01:41:46 +00004081*/
sewardjffce8152011-06-24 10:09:41 +00004082typedef
4083 struct {
4084 RCEC* rcec;
4085 WordSetID locksHeldW;
4086 UInt thrid : SCALARTS_N_THRBITS;
4087 UInt szLg2B : 2;
4088 UInt isW : 1;
4089 }
4090 Thr_n_RCEC;
sewardjf98e1c02008-10-25 16:22:41 +00004091
sewardj849b0ed2008-12-21 10:43:10 +00004092#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00004093
4094typedef
philippecabdbb52015-04-20 21:33:16 +00004095 struct OldRef {
4096 struct OldRef *prev; // to refs older than this one
4097 struct OldRef *next; // to refs newer that this one
4098 Addr ga; // Address for which we record up to N_OLDREF_ACCS accesses.
sewardjffce8152011-06-24 10:09:41 +00004099 /* unused slots in this array have .thrid == 0, which is invalid */
sewardjf98e1c02008-10-25 16:22:41 +00004100 Thr_n_RCEC accs[N_OLDREF_ACCS];
4101 }
4102 OldRef;
philippecabdbb52015-04-20 21:33:16 +00004103/* We need ga in OldRef in order to remove OldRef from the sparsewa
4104 by key (i.e. ga) when re-using the lru OldRef. */
sewardjd86e3a22008-12-03 11:39:37 +00004105
philippe6643e962012-01-17 21:16:30 +00004106//////////// BEGIN OldRef pool allocator
4107static PoolAlloc* oldref_pool_allocator;
philippecabdbb52015-04-20 21:33:16 +00004108// Note: We only allocate elements in this pool allocator, we never free them.
4109// We stop allocating elements at VG_(clo_conflict_cache_size).
philippe6643e962012-01-17 21:16:30 +00004110//////////// END OldRef pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00004111
philippecabdbb52015-04-20 21:33:16 +00004112static OldRef mru;
4113static OldRef lru;
4114// A double linked list, chaining all OldREf in a mru/lru order.
4115// mru/lru are sentinel nodes.
4116// Whenever an oldref is re-used, its position is changed as the most recently
4117// used (i.e. pointed to by mru.prev).
4118// When a new oldref is needed, it is allocated from the pool
4119// if we have not yet reached --conflict-cache-size.
4120// Otherwise, if all oldref have already been allocated,
4121// the least recently used (i.e. pointed to by lru.next) is re-used.
4122// When an OldRef is used, it is moved as the most recently used entry
4123// (i.e. pointed to by mru.prev).
4124
4125// Removes r from the double linked list
4126// Note: we do not need to test for special cases such as
4127// NULL next or prev pointers, because we have sentinel nodes
4128// at both sides of the list. So, a node is always forward and
4129// backward linked.
4130static inline void OldRef_unchain(OldRef *r)
4131{
4132 r->next->prev = r->prev;
4133 r->prev->next = r->next;
4134}
4135
4136// Insert new as the newest OldRef
4137// Similarly to OldRef_unchain, no need to test for NULL
4138// pointers, as e.g. mru.prev is always guaranteed to point
4139// to a non NULL node (lru when the list is empty).
4140static inline void OldRef_newest(OldRef *new)
4141{
4142 new->next = &mru;
4143 new->prev = mru.prev;
4144 mru.prev = new;
4145 new->prev->next = new;
4146}
sewardjd86e3a22008-12-03 11:39:37 +00004147
sewardjbc307e52008-12-06 22:10:54 +00004148static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
sewardjbc307e52008-12-06 22:10:54 +00004149static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
philippecabdbb52015-04-20 21:33:16 +00004150/* Note: the nr of ref in the oldrefTree will always be equal to
4151 the nr of elements that were allocated from the OldRef pool allocator
4152 as we never free an OldRef : we just re-use them. */
4153
4154
4155/* allocates a new OldRef or re-use the lru one if all allowed OldRef
4156 have already been allocated. */
4157static OldRef* alloc_or_reuse_OldRef ( void )
4158{
4159 if (oldrefTreeN < HG_(clo_conflict_cache_size)) {
4160 oldrefTreeN++;
4161 return VG_(allocEltPA) ( oldref_pool_allocator );
4162 } else {
4163 Bool b;
4164 UWord valW;
4165 OldRef *oldref = lru.next;
4166
4167 OldRef_unchain(oldref);
4168 b = VG_(delFromSWA)( oldrefTree, &valW, oldref->ga );
4169 tl_assert(b);
4170 tl_assert (oldref == (OldRef*)valW);
4171
4172 for (UInt i = 0; i < N_OLDREF_ACCS; i++) {
4173 ThrID aThrID = oldref->accs[i].thrid;
4174 RCEC* aRef = oldref->accs[i].rcec;
4175 if (aRef) {
4176 tl_assert(aThrID != 0);
4177 stats__ctxt_rcdec3++;
4178 ctxt__rcdec( aRef );
4179 } else {
4180 tl_assert(aThrID == 0);
4181 }
4182 }
4183 return oldref;
4184 }
4185}
4186
sewardjf98e1c02008-10-25 16:22:41 +00004187
sewardj1669cc72008-12-13 01:20:21 +00004188inline static UInt min_UInt ( UInt a, UInt b ) {
4189 return a < b ? a : b;
4190}
4191
sewardja781be62008-12-08 00:12:28 +00004192/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4193 first interval is lower, 1 if the first interval is higher, and 0
4194 if there is any overlap. Redundant paranoia with casting is there
4195 following what looked distinctly like a bug in gcc-4.1.2, in which
4196 some of the comparisons were done signedly instead of
4197 unsignedly. */
4198/* Copied from exp-ptrcheck/sg_main.c */
4199static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4200 Addr a2, SizeT n2 ) {
4201 UWord a1w = (UWord)a1;
4202 UWord n1w = (UWord)n1;
4203 UWord a2w = (UWord)a2;
4204 UWord n2w = (UWord)n2;
4205 tl_assert(n1w > 0 && n2w > 0);
4206 if (a1w + n1w <= a2w) return -1L;
4207 if (a2w + n2w <= a1w) return 1L;
4208 return 0;
4209}
4210
sewardjc5ea9962008-12-07 01:41:46 +00004211static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00004212{
sewardjd86e3a22008-12-03 11:39:37 +00004213 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00004214 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00004215 Word i, j;
philippe40648e22015-04-11 11:42:22 +00004216 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004217 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004218
sewardjffce8152011-06-24 10:09:41 +00004219 tl_assert(thr);
4220 ThrID thrid = thr->thrid;
4221 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4222
4223 WordSetID locksHeldW = thr->hgthread->locksetW;
4224
sewardjc5ea9962008-12-07 01:41:46 +00004225 rcec = get_RCEC( thr );
4226 ctxt__rcinc(rcec);
4227
sewardjffce8152011-06-24 10:09:41 +00004228 UInt szLg2B = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004229 switch (szB) {
4230 /* This doesn't look particularly branch-predictor friendly. */
sewardjffce8152011-06-24 10:09:41 +00004231 case 1: szLg2B = 0; break;
4232 case 2: szLg2B = 1; break;
4233 case 4: szLg2B = 2; break;
4234 case 8: szLg2B = 3; break;
sewardjc5ea9962008-12-07 01:41:46 +00004235 default: tl_assert(0);
4236 }
4237
sewardjffce8152011-06-24 10:09:41 +00004238 /* Look in the map to see if we already have a record for this
4239 address. */
philippe40648e22015-04-11 11:42:22 +00004240 b = VG_(lookupSWA)( oldrefTree, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00004241
sewardjd86e3a22008-12-03 11:39:37 +00004242 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00004243
4244 /* We already have a record for this address. We now need to
sewardjffce8152011-06-24 10:09:41 +00004245 see if we have a stack trace pertaining to this (thrid, R/W,
sewardj849b0ed2008-12-21 10:43:10 +00004246 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00004247 ref = (OldRef*)valW;
philippecabdbb52015-04-20 21:33:16 +00004248
4249 tl_assert (ref->ga == a);
sewardjf98e1c02008-10-25 16:22:41 +00004250
sewardjf98e1c02008-10-25 16:22:41 +00004251 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004252 if (ref->accs[i].thrid != thrid)
sewardj849b0ed2008-12-21 10:43:10 +00004253 continue;
sewardjffce8152011-06-24 10:09:41 +00004254 if (ref->accs[i].szLg2B != szLg2B)
4255 continue;
4256 if (ref->accs[i].isW != (UInt)(isW & 1))
sewardj849b0ed2008-12-21 10:43:10 +00004257 continue;
4258 /* else we have a match, so stop looking. */
4259 break;
sewardjf98e1c02008-10-25 16:22:41 +00004260 }
4261
4262 if (i < N_OLDREF_ACCS) {
sewardjffce8152011-06-24 10:09:41 +00004263 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */
sewardjf98e1c02008-10-25 16:22:41 +00004264 if (i > 0) {
4265 Thr_n_RCEC tmp = ref->accs[i-1];
4266 ref->accs[i-1] = ref->accs[i];
4267 ref->accs[i] = tmp;
4268 i--;
4269 }
sewardjc5ea9962008-12-07 01:41:46 +00004270 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00004271 stats__ctxt_rcdec1++;
sewardjffce8152011-06-24 10:09:41 +00004272 ctxt__rcdec( ref->accs[i].rcec );
4273 tl_assert(ref->accs[i].thrid == thrid);
4274 /* Update the RCEC and the W-held lockset. */
4275 ref->accs[i].rcec = rcec;
4276 ref->accs[i].locksHeldW = locksHeldW;
sewardjf98e1c02008-10-25 16:22:41 +00004277 } else {
sewardjffce8152011-06-24 10:09:41 +00004278 /* No entry for this (thread, R/W, size, nWHeld) quad.
4279 Shuffle all of them down one slot, and put the new entry
4280 at the start of the array. */
4281 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
sewardjf98e1c02008-10-25 16:22:41 +00004282 /* the last slot is in use. We must dec the rc on the
4283 associated rcec. */
4284 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
4285 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00004286 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
4287 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjffce8152011-06-24 10:09:41 +00004288 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
sewardjf98e1c02008-10-25 16:22:41 +00004289 } else {
4290 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
4291 }
4292 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
4293 ref->accs[j] = ref->accs[j-1];
sewardjffce8152011-06-24 10:09:41 +00004294 ref->accs[0].thrid = thrid;
4295 ref->accs[0].szLg2B = szLg2B;
4296 ref->accs[0].isW = (UInt)(isW & 1);
4297 ref->accs[0].locksHeldW = locksHeldW;
4298 ref->accs[0].rcec = rcec;
4299 /* thrid==0 is used to signify an empty slot, so we can't
4300 add zero thrid (such a ThrID is invalid anyway). */
4301 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
sewardjf98e1c02008-10-25 16:22:41 +00004302 }
4303
philippecabdbb52015-04-20 21:33:16 +00004304 OldRef_unchain(ref);
4305 OldRef_newest(ref);
sewardjf98e1c02008-10-25 16:22:41 +00004306
4307 } else {
4308
4309 /* We don't have a record for this address. Create a new one. */
philippecabdbb52015-04-20 21:33:16 +00004310 ref = alloc_or_reuse_OldRef();
4311 ref->ga = a;
sewardjffce8152011-06-24 10:09:41 +00004312 ref->accs[0].thrid = thrid;
4313 ref->accs[0].szLg2B = szLg2B;
4314 ref->accs[0].isW = (UInt)(isW & 1);
4315 ref->accs[0].locksHeldW = locksHeldW;
4316 ref->accs[0].rcec = rcec;
4317
4318 /* thrid==0 is used to signify an empty slot, so we can't
4319 add zero thrid (such a ThrID is invalid anyway). */
4320 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4321
4322 /* Clear out the rest of the entries */
sewardjf98e1c02008-10-25 16:22:41 +00004323 for (j = 1; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004324 ref->accs[j].rcec = NULL;
4325 ref->accs[j].thrid = 0;
4326 ref->accs[j].szLg2B = 0;
4327 ref->accs[j].isW = 0;
4328 ref->accs[j].locksHeldW = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004329 }
sewardjbc307e52008-12-06 22:10:54 +00004330 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
philippecabdbb52015-04-20 21:33:16 +00004331 OldRef_newest (ref);
sewardjf98e1c02008-10-25 16:22:41 +00004332 }
4333}
4334
4335
sewardjffce8152011-06-24 10:09:41 +00004336/* Extract info from the conflicting-access machinery. */
sewardjc5ea9962008-12-07 01:41:46 +00004337Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjffce8152011-06-24 10:09:41 +00004338 /*OUT*/Thr** resThr,
4339 /*OUT*/SizeT* resSzB,
4340 /*OUT*/Bool* resIsW,
4341 /*OUT*/WordSetID* locksHeldW,
sewardjc5ea9962008-12-07 01:41:46 +00004342 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00004343{
sewardja781be62008-12-08 00:12:28 +00004344 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00004345 OldRef* ref;
philippe40648e22015-04-11 11:42:22 +00004346 UWord valW;
sewardjd86e3a22008-12-03 11:39:37 +00004347 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004348
sewardjffce8152011-06-24 10:09:41 +00004349 ThrID cand_thrid;
4350 RCEC* cand_rcec;
4351 Bool cand_isW;
4352 SizeT cand_szB;
4353 WordSetID cand_locksHeldW;
4354 Addr cand_a;
sewardja781be62008-12-08 00:12:28 +00004355
4356 Addr toCheck[15];
4357 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004358
4359 tl_assert(thr);
4360 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00004361
sewardjffce8152011-06-24 10:09:41 +00004362 ThrID thrid = thr->thrid;
4363
sewardja781be62008-12-08 00:12:28 +00004364 toCheck[nToCheck++] = a;
4365 for (i = -7; i < (Word)szB; i++) {
4366 if (i != 0)
4367 toCheck[nToCheck++] = a + i;
4368 }
4369 tl_assert(nToCheck <= 15);
4370
4371 /* Now see if we can find a suitable matching event for
4372 any of the addresses in toCheck[0 .. nToCheck-1]. */
4373 for (j = 0; j < nToCheck; j++) {
4374
4375 cand_a = toCheck[j];
4376 // VG_(printf)("test %ld %p\n", j, cand_a);
4377
philippe40648e22015-04-11 11:42:22 +00004378 b = VG_(lookupSWA)( oldrefTree, &valW, cand_a );
sewardja781be62008-12-08 00:12:28 +00004379 if (!b)
4380 continue;
4381
sewardjd86e3a22008-12-03 11:39:37 +00004382 ref = (OldRef*)valW;
sewardjffce8152011-06-24 10:09:41 +00004383 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
sewardjf98e1c02008-10-25 16:22:41 +00004384
sewardjffce8152011-06-24 10:09:41 +00004385 cand_thrid = 0; /* invalid; see comments in event_map_bind */
4386 cand_rcec = NULL;
4387 cand_isW = False;
4388 cand_szB = 0;
4389 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
sewardjf98e1c02008-10-25 16:22:41 +00004390
sewardjc5ea9962008-12-07 01:41:46 +00004391 for (i = 0; i < N_OLDREF_ACCS; i++) {
4392 Thr_n_RCEC* cand = &ref->accs[i];
sewardjffce8152011-06-24 10:09:41 +00004393 cand_rcec = cand->rcec;
4394 cand_thrid = cand->thrid;
4395 cand_isW = (Bool)cand->isW;
4396 cand_szB = 1 << cand->szLg2B;
4397 cand_locksHeldW = cand->locksHeldW;
sewardjc5ea9962008-12-07 01:41:46 +00004398
sewardjffce8152011-06-24 10:09:41 +00004399 if (cand_thrid == 0)
sewardjc5ea9962008-12-07 01:41:46 +00004400 /* This slot isn't in use. Ignore it. */
4401 continue;
4402
sewardjffce8152011-06-24 10:09:41 +00004403 if (cand_thrid == thrid)
sewardjc5ea9962008-12-07 01:41:46 +00004404 /* This is an access by the same thread, but we're only
4405 interested in accesses from other threads. Ignore. */
4406 continue;
4407
4408 if ((!cand_isW) && (!isW))
4409 /* We don't want to report a read racing against another
4410 read; that's stupid. So in this case move on. */
4411 continue;
4412
sewardja781be62008-12-08 00:12:28 +00004413 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
4414 /* No overlap with the access we're asking about. Ignore. */
4415 continue;
4416
sewardjc5ea9962008-12-07 01:41:46 +00004417 /* We have a match. Stop searching. */
4418 break;
4419 }
4420
4421 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
4422
sewardja781be62008-12-08 00:12:28 +00004423 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00004424 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00004425 /* return with success */
sewardjffce8152011-06-24 10:09:41 +00004426 tl_assert(cand_thrid);
sewardja781be62008-12-08 00:12:28 +00004427 tl_assert(cand_rcec);
4428 tl_assert(cand_rcec->magic == RCEC_MAGIC);
4429 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00004430 /* Count how many non-zero frames we have. */
4431 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4432 for (n = 0; n < maxNFrames; n++) {
4433 if (0 == cand_rcec->frames[n]) break;
4434 }
sewardjffce8152011-06-24 10:09:41 +00004435 *resEC = VG_(make_ExeContext_from_StackTrace)
4436 (cand_rcec->frames, n);
4437 *resThr = Thr__from_ThrID(cand_thrid);
4438 *resSzB = cand_szB;
4439 *resIsW = cand_isW;
4440 *locksHeldW = cand_locksHeldW;
sewardja781be62008-12-08 00:12:28 +00004441 return True;
4442 }
sewardjc5ea9962008-12-07 01:41:46 +00004443
sewardja781be62008-12-08 00:12:28 +00004444 /* consider next address in toCheck[] */
4445 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00004446
sewardja781be62008-12-08 00:12:28 +00004447 /* really didn't find anything. */
4448 return False;
sewardjf98e1c02008-10-25 16:22:41 +00004449}
4450
4451static void event_map_init ( void )
4452{
4453 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00004454
philippe6643e962012-01-17 21:16:30 +00004455 /* Context (RCEC) pool allocator */
4456 rcec_pool_allocator = VG_(newPA) (
4457 sizeof(RCEC),
4458 1000 /* RCECs per pool */,
4459 HG_(zalloc),
4460 "libhb.event_map_init.1 (RCEC pools)",
4461 HG_(free)
4462 );
sewardjd86e3a22008-12-03 11:39:37 +00004463
4464 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00004465 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00004466 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00004467 N_RCEC_TAB * sizeof(RCEC*) );
sewardjf98e1c02008-10-25 16:22:41 +00004468 for (i = 0; i < N_RCEC_TAB; i++)
4469 contextTab[i] = NULL;
4470
philippe6643e962012-01-17 21:16:30 +00004471 /* Oldref pool allocator */
4472 oldref_pool_allocator = VG_(newPA)(
4473 sizeof(OldRef),
4474 1000 /* OldRefs per pool */,
4475 HG_(zalloc),
4476 "libhb.event_map_init.3 (OldRef pools)",
4477 HG_(free)
4478 );
sewardjd86e3a22008-12-03 11:39:37 +00004479
sewardjd86e3a22008-12-03 11:39:37 +00004480 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00004481 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00004482 oldrefTree = VG_(newSWA)(
4483 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00004484 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00004485 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00004486 );
sewardjf98e1c02008-10-25 16:22:41 +00004487
sewardjf98e1c02008-10-25 16:22:41 +00004488 oldrefTreeN = 0;
philippecabdbb52015-04-20 21:33:16 +00004489 mru.prev = &lru;
4490 mru.next = NULL;
4491 lru.prev = NULL;
4492 lru.next = &mru;
4493 for (i = 0; i < N_OLDREF_ACCS; i++) {
4494 mru.accs[i] = (Thr_n_RCEC) {.rcec = NULL,
4495 .locksHeldW = 0,
4496 .thrid = 0,
4497 .szLg2B = 0,
4498 .isW = 0};
4499 lru.accs[i] = mru.accs[i];
4500 }
sewardjf98e1c02008-10-25 16:22:41 +00004501}
4502
philippecabdbb52015-04-20 21:33:16 +00004503static void event_map__check_reference_counts ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004504{
4505 RCEC* rcec;
4506 OldRef* oldref;
4507 Word i;
4508 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00004509 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00004510
4511 /* Set the 'check' reference counts to zero. Also, optionally
4512 check that the real reference counts are non-zero. We allow
4513 these to fall to zero before a GC, but the GC must get rid of
4514 all those that are zero, hence none should be zero after a
4515 GC. */
4516 for (i = 0; i < N_RCEC_TAB; i++) {
4517 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4518 nEnts++;
4519 tl_assert(rcec);
4520 tl_assert(rcec->magic == RCEC_MAGIC);
sewardjf98e1c02008-10-25 16:22:41 +00004521 rcec->rcX = 0;
4522 }
4523 }
4524
4525 /* check that the stats are sane */
4526 tl_assert(nEnts == stats__ctxt_tab_curr);
4527 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4528
4529 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00004530 VG_(initIterSWA)( oldrefTree );
4531 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004532 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004533 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004534 ThrID aThrID = oldref->accs[i].thrid;
4535 RCEC* aRef = oldref->accs[i].rcec;
4536 if (aThrID != 0) {
sewardjc5ea9962008-12-07 01:41:46 +00004537 tl_assert(aRef);
4538 tl_assert(aRef->magic == RCEC_MAGIC);
4539 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00004540 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004541 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00004542 }
4543 }
4544 }
4545
4546 /* compare check ref counts with actual */
4547 for (i = 0; i < N_RCEC_TAB; i++) {
4548 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4549 tl_assert(rcec->rc == rcec->rcX);
4550 }
4551 }
4552}
4553
sewardj8fd92d32008-11-20 23:17:01 +00004554__attribute__((noinline))
philippecabdbb52015-04-20 21:33:16 +00004555static void do_RCEC_GC ( void )
sewardjf98e1c02008-10-25 16:22:41 +00004556{
philippecabdbb52015-04-20 21:33:16 +00004557 UInt i;
sewardjf98e1c02008-10-25 16:22:41 +00004558
philippecabdbb52015-04-20 21:33:16 +00004559 if (VG_(clo_stats)) {
4560 static UInt ctr = 1;
4561 VG_(message)(Vg_DebugMsg,
4562 "libhb: RCEC GC: #%u %lu slots,"
4563 " %lu cur ents(ref'd %lu),"
4564 " %lu max ents\n",
4565 ctr++,
4566 (UWord)N_RCEC_TAB,
4567 stats__ctxt_tab_curr, RCEC_referenced,
4568 stats__ctxt_tab_max );
sewardjf98e1c02008-10-25 16:22:41 +00004569 }
philippecabdbb52015-04-20 21:33:16 +00004570 tl_assert (stats__ctxt_tab_curr > RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004571
4572 /* Throw away all RCECs with zero reference counts */
4573 for (i = 0; i < N_RCEC_TAB; i++) {
4574 RCEC** pp = &contextTab[i];
4575 RCEC* p = *pp;
4576 while (p) {
4577 if (p->rc == 0) {
4578 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004579 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004580 p = *pp;
4581 tl_assert(stats__ctxt_tab_curr > 0);
philippe06bc23a2015-04-17 21:19:43 +00004582 stats__ctxt_rcdec_discards++;
sewardjf98e1c02008-10-25 16:22:41 +00004583 stats__ctxt_tab_curr--;
4584 } else {
4585 pp = &p->next;
4586 p = p->next;
4587 }
4588 }
4589 }
4590
philippecabdbb52015-04-20 21:33:16 +00004591 tl_assert (stats__ctxt_tab_curr == RCEC_referenced);
sewardjf98e1c02008-10-25 16:22:41 +00004592}
4593
sewardjf98e1c02008-10-25 16:22:41 +00004594/////////////////////////////////////////////////////////
4595// //
4596// Core MSM //
4597// //
4598/////////////////////////////////////////////////////////
4599
sewardj23f12002009-07-24 08:45:08 +00004600/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4601 Nov 08, and again after [...],
4602 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004603
sewardj23f12002009-07-24 08:45:08 +00004604static ULong stats__msmcread = 0;
4605static ULong stats__msmcread_change = 0;
4606static ULong stats__msmcwrite = 0;
4607static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004608
sewardj8ab2c132009-08-02 09:34:35 +00004609/* Some notes on the H1 history mechanism:
4610
4611 Transition rules are:
4612
4613 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4614 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4615
4616 After any access by a thread T to a location L, L's constraint pair
4617 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4618
4619 After a race by thread T conflicting with some previous access by
4620 some other thread U, for a location with constraint (before
4621 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4622 which the previously access lies.
4623
4624 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4625 are compared so as to find out which thread(s) this access
4626 conflicts with. Once that is established, we also require the
4627 pre-update Cw for the location, so we can index into it for those
4628 threads, to get the scalar clock values for the point at which the
4629 former accesses were made. (In fact we only bother to do any of
4630 this for an arbitrarily chosen one of the conflicting threads, as
4631 that's simpler, it avoids flooding the user with vast amounts of
4632 mostly useless information, and because the program is wrong if it
4633 contains any races at all -- so we don't really need to show all
4634 conflicting access pairs initially, so long as we only show none if
4635 none exist).
4636
4637 ---
4638
4639 That requires the auxiliary proof that
4640
4641 (Cr `join` Kw)[T] == Kw[T]
4642
4643 Why should that be true? Because for any thread T, Kw[T] >= the
4644 scalar clock value for T known by any other thread. In other
4645 words, because T's value for its own scalar clock is at least as up
4646 to date as the value for it known by any other thread (that is true
4647 for both the R- and W- scalar clocks). Hence no other thread will
4648 be able to feed in a value for that element (indirectly via a
4649 constraint) which will exceed Kw[T], and hence the join cannot
4650 cause that particular element to advance.
4651*/
4652
sewardjf98e1c02008-10-25 16:22:41 +00004653__attribute__((noinline))
4654static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004655 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00004656 VtsID Cfailed,
4657 VtsID Kfailed,
4658 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00004659{
sewardjc5ea9962008-12-07 01:41:46 +00004660 /* Call here to report a race. We just hand it onwards to
4661 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004662 error is going to be collected, then, at history_level 2, that
4663 queries the conflicting-event map. The alternative would be to
4664 query it right here. But that causes a lot of pointless queries
4665 for errors which will shortly be discarded as duplicates, and
4666 can become a performance overhead; so we defer the query until
4667 we know the error is not a duplicate. */
4668
4669 /* Stacks for the bounds of the (or one of the) conflicting
4670 segment(s). These are only set at history_level 1. */
4671 ExeContext* hist1_seg_start = NULL;
4672 ExeContext* hist1_seg_end = NULL;
4673 Thread* hist1_conf_thr = NULL;
4674
4675 tl_assert(acc_thr);
sewardj60626642011-03-10 15:14:37 +00004676 tl_assert(acc_thr->hgthread);
4677 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
sewardj23f12002009-07-24 08:45:08 +00004678 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4679
4680 if (HG_(clo_history_level) == 1) {
4681 Bool found;
4682 Word firstIx, lastIx;
4683 ULong_n_EC key;
4684
4685 /* At history_level 1, we must round up the relevant stack-pair
4686 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00004687 deferring it is complex; we can't (easily) put Kfailed and
4688 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00004689 getting tied up in difficulties with VtsID reference
4690 counting. So just do it now. */
4691 Thr* confThr;
4692 ULong confTym = 0;
4693 /* Which thread are we in conflict with? There may be more than
4694 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4695 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00004696 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00004697 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00004698 conflict (semantics of return value of
4699 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4700 called us, just checked exactly this -- that there was in
4701 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00004702 tl_assert(confThr);
4703
4704 /* Get the scalar clock value that the conflicting thread
4705 introduced into the constraint. A careful examination of the
4706 base machine rules shows that this must be the same as the
4707 conflicting thread's scalar clock when it created this
4708 constraint. Hence we know the scalar clock of the
4709 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00004710 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00004711
4712 /* Using this scalar clock, index into the conflicting thread's
4713 collection of stack traces made each time its vector clock
4714 (hence its scalar clock) changed. This gives the stack
4715 traces at the start and end of the conflicting segment (well,
4716 as per comment just above, of one of the conflicting
4717 segments, if there are more than one). */
4718 key.ull = confTym;
4719 key.ec = NULL;
4720 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00004721 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004722 firstIx = lastIx = 0;
4723 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00004724 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004725 &key, &firstIx, &lastIx,
florian6bd9dc12012-11-23 16:17:43 +00004726 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
sewardj23f12002009-07-24 08:45:08 +00004727 );
sewardj8ab2c132009-08-02 09:34:35 +00004728 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00004729 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00004730 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00004731 confThr, confTym, found, firstIx, lastIx);
4732 /* We can't indefinitely collect stack traces at VTS
4733 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00004734 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00004735 ones, which in turn means we might fail to find index value
4736 confTym in the array. */
4737 if (found) {
4738 ULong_n_EC *pair_start, *pair_end;
4739 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00004740 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00004741 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004742 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00004743 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00004744 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004745 lastIx+1 );
4746 /* from properties of VG_(lookupXA) and the comparison fn used: */
4747 tl_assert(pair_start->ull < pair_end->ull);
4748 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004749 /* Could do a bit better here. It may be that pair_end
4750 doesn't have a stack, but the following entries in the
4751 array have the same scalar Kw and to have a stack. So
4752 we should search a bit further along the array than
4753 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00004754 } else {
sewardjffce8152011-06-24 10:09:41 +00004755 if (!confThr->llexit_done)
sewardj23f12002009-07-24 08:45:08 +00004756 hist1_seg_end = main_get_EC( confThr );
4757 }
4758 // seg_start could be NULL iff this is the first stack in the thread
4759 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4760 //if (seg_end) VG_(pp_ExeContext)(seg_end);
sewardj60626642011-03-10 15:14:37 +00004761 hist1_conf_thr = confThr->hgthread;
sewardj23f12002009-07-24 08:45:08 +00004762 }
4763 }
4764
sewardj60626642011-03-10 15:14:37 +00004765 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00004766 szB, isWrite,
4767 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00004768}
4769
4770static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00004771 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00004772 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00004773 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4774 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00004775}
4776
4777
4778/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00004779static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004780 /* The following are only needed for
4781 creating error reports. */
4782 Thr* acc_thr,
4783 Addr acc_addr, SizeT szB )
4784{
4785 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004786 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00004787
4788 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004789 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004790 tl_assert(is_sane_SVal_C(svOld));
4791 }
4792
sewardj1c0ce7a2009-07-01 08:10:49 +00004793 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004794 VtsID tviR = acc_thr->viR;
4795 VtsID tviW = acc_thr->viW;
4796 VtsID rmini = SVal__unC_Rmin(svOld);
4797 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004798 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4799 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004800 /* no race */
4801 /* Note: RWLOCK subtlety: use tviW, not tviR */
4802 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4803 goto out;
4804 } else {
sewardjb0e009d2008-11-19 16:35:15 +00004805 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004806 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4807 tl_assert(leqxx);
4808 // same as in non-race case
4809 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4810 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004811 rmini, /* Cfailed */
4812 tviR, /* Kfailed */
4813 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004814 goto out;
4815 }
4816 }
4817 if (SVal__isA(svOld)) {
4818 /* reading no-access memory (sigh); leave unchanged */
4819 /* check for no pollution */
4820 tl_assert(svOld == SVal_NOACCESS);
4821 svNew = SVal_NOACCESS;
4822 goto out;
4823 }
sewardj23f12002009-07-24 08:45:08 +00004824 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004825 tl_assert(0);
4826
4827 out:
sewardj8f5374e2008-12-07 11:40:17 +00004828 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004829 tl_assert(is_sane_SVal_C(svNew));
4830 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004831 if (UNLIKELY(svNew != svOld)) {
4832 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004833 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004834 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004835 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004836 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004837 }
4838 }
4839 return svNew;
4840}
4841
4842
4843/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00004844static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004845 /* The following are only needed for
4846 creating error reports. */
4847 Thr* acc_thr,
4848 Addr acc_addr, SizeT szB )
4849{
4850 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004851 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00004852
4853 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004854 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004855 tl_assert(is_sane_SVal_C(svOld));
4856 }
4857
sewardj1c0ce7a2009-07-01 08:10:49 +00004858 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004859 VtsID tviW = acc_thr->viW;
4860 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004861 Bool leq = VtsID__cmpLEQ(wmini,tviW);
4862 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004863 /* no race */
4864 svNew = SVal__mkC( tviW, tviW );
4865 goto out;
4866 } else {
4867 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00004868 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004869 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4870 tl_assert(leqxx);
4871 // same as in non-race case
4872 // proof: in the non-race case, we have
4873 // rmini <= wmini (invar on constraints)
4874 // tviW <= tviR (invar on thread clocks)
4875 // wmini <= tviW (from run-time check)
4876 // hence from transitivity of <= we have
4877 // rmini <= wmini <= tviW
4878 // and so join(rmini,tviW) == tviW
4879 // and join(wmini,tviW) == tviW
4880 // qed.
4881 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
4882 VtsID__join2(wmini, tviW) );
4883 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004884 wmini, /* Cfailed */
4885 tviW, /* Kfailed */
4886 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004887 goto out;
4888 }
4889 }
4890 if (SVal__isA(svOld)) {
4891 /* writing no-access memory (sigh); leave unchanged */
4892 /* check for no pollution */
4893 tl_assert(svOld == SVal_NOACCESS);
4894 svNew = SVal_NOACCESS;
4895 goto out;
4896 }
sewardj23f12002009-07-24 08:45:08 +00004897 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004898 tl_assert(0);
4899
4900 out:
sewardj8f5374e2008-12-07 11:40:17 +00004901 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004902 tl_assert(is_sane_SVal_C(svNew));
4903 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004904 if (UNLIKELY(svNew != svOld)) {
4905 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004906 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004907 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004908 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004909 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004910 }
4911 }
4912 return svNew;
4913}
4914
4915
4916/////////////////////////////////////////////////////////
4917// //
4918// Apply core MSM to specific memory locations //
4919// //
4920/////////////////////////////////////////////////////////
4921
sewardj23f12002009-07-24 08:45:08 +00004922/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004923
sewardj23f12002009-07-24 08:45:08 +00004924static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004925 CacheLine* cl;
4926 UWord cloff, tno, toff;
4927 SVal svOld, svNew;
4928 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004929 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004930 cl = get_cacheline(a);
4931 cloff = get_cacheline_offset(a);
4932 tno = get_treeno(a);
4933 toff = get_tree_offset(a); /* == 0 .. 7 */
4934 descr = cl->descrs[tno];
4935 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4936 SVal* tree = &cl->svals[tno << 3];
4937 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004938 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004939 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4940 }
4941 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004942 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004943 if (CHECK_ZSM)
4944 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004945 cl->svals[cloff] = svNew;
4946}
4947
sewardj23f12002009-07-24 08:45:08 +00004948static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004949 CacheLine* cl;
4950 UWord cloff, tno, toff;
4951 SVal svOld, svNew;
4952 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004953 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004954 cl = get_cacheline(a);
4955 cloff = get_cacheline_offset(a);
4956 tno = get_treeno(a);
4957 toff = get_tree_offset(a); /* == 0 .. 7 */
4958 descr = cl->descrs[tno];
4959 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4960 SVal* tree = &cl->svals[tno << 3];
4961 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004962 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004963 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4964 }
4965 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004966 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004967 if (CHECK_ZSM)
4968 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004969 cl->svals[cloff] = svNew;
4970}
4971
sewardj23f12002009-07-24 08:45:08 +00004972/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004973
sewardj23f12002009-07-24 08:45:08 +00004974static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004975 CacheLine* cl;
4976 UWord cloff, tno, toff;
4977 SVal svOld, svNew;
4978 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004979 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004980 if (UNLIKELY(!aligned16(a))) goto slowcase;
4981 cl = get_cacheline(a);
4982 cloff = get_cacheline_offset(a);
4983 tno = get_treeno(a);
4984 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4985 descr = cl->descrs[tno];
4986 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4987 if (valid_value_is_below_me_16(descr, toff)) {
4988 goto slowcase;
4989 } else {
4990 SVal* tree = &cl->svals[tno << 3];
4991 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4992 }
sewardj8f5374e2008-12-07 11:40:17 +00004993 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004994 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4995 }
4996 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004997 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004998 if (CHECK_ZSM)
4999 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005000 cl->svals[cloff] = svNew;
5001 return;
5002 slowcase: /* misaligned, or must go further down the tree */
5003 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005004 zsm_sapply08__msmcread( thr, a + 0 );
5005 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005006}
5007
sewardj23f12002009-07-24 08:45:08 +00005008static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005009 CacheLine* cl;
5010 UWord cloff, tno, toff;
5011 SVal svOld, svNew;
5012 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005013 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005014 if (UNLIKELY(!aligned16(a))) goto slowcase;
5015 cl = get_cacheline(a);
5016 cloff = get_cacheline_offset(a);
5017 tno = get_treeno(a);
5018 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5019 descr = cl->descrs[tno];
5020 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5021 if (valid_value_is_below_me_16(descr, toff)) {
5022 goto slowcase;
5023 } else {
5024 SVal* tree = &cl->svals[tno << 3];
5025 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5026 }
sewardj8f5374e2008-12-07 11:40:17 +00005027 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005028 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5029 }
5030 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005031 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005032 if (CHECK_ZSM)
5033 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005034 cl->svals[cloff] = svNew;
5035 return;
5036 slowcase: /* misaligned, or must go further down the tree */
5037 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005038 zsm_sapply08__msmcwrite( thr, a + 0 );
5039 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005040}
5041
sewardj23f12002009-07-24 08:45:08 +00005042/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005043
sewardj23f12002009-07-24 08:45:08 +00005044static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005045 CacheLine* cl;
5046 UWord cloff, tno, toff;
5047 SVal svOld, svNew;
5048 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005049 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005050 if (UNLIKELY(!aligned32(a))) goto slowcase;
5051 cl = get_cacheline(a);
5052 cloff = get_cacheline_offset(a);
5053 tno = get_treeno(a);
5054 toff = get_tree_offset(a); /* == 0 or 4 */
5055 descr = cl->descrs[tno];
5056 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5057 if (valid_value_is_above_me_32(descr, toff)) {
5058 SVal* tree = &cl->svals[tno << 3];
5059 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5060 } else {
5061 goto slowcase;
5062 }
sewardj8f5374e2008-12-07 11:40:17 +00005063 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005064 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5065 }
5066 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005067 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005068 if (CHECK_ZSM)
5069 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005070 cl->svals[cloff] = svNew;
5071 return;
5072 slowcase: /* misaligned, or must go further down the tree */
5073 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005074 zsm_sapply16__msmcread( thr, a + 0 );
5075 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005076}
5077
sewardj23f12002009-07-24 08:45:08 +00005078static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005079 CacheLine* cl;
5080 UWord cloff, tno, toff;
5081 SVal svOld, svNew;
5082 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005083 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005084 if (UNLIKELY(!aligned32(a))) goto slowcase;
5085 cl = get_cacheline(a);
5086 cloff = get_cacheline_offset(a);
5087 tno = get_treeno(a);
5088 toff = get_tree_offset(a); /* == 0 or 4 */
5089 descr = cl->descrs[tno];
5090 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5091 if (valid_value_is_above_me_32(descr, toff)) {
5092 SVal* tree = &cl->svals[tno << 3];
5093 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5094 } else {
5095 goto slowcase;
5096 }
sewardj8f5374e2008-12-07 11:40:17 +00005097 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005098 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5099 }
5100 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005101 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005102 if (CHECK_ZSM)
5103 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005104 cl->svals[cloff] = svNew;
5105 return;
5106 slowcase: /* misaligned, or must go further down the tree */
5107 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005108 zsm_sapply16__msmcwrite( thr, a + 0 );
5109 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005110}
5111
sewardj23f12002009-07-24 08:45:08 +00005112/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005113
sewardj23f12002009-07-24 08:45:08 +00005114static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005115 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005116 UWord cloff, tno;
5117 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005118 SVal svOld, svNew;
5119 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005120 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005121 if (UNLIKELY(!aligned64(a))) goto slowcase;
5122 cl = get_cacheline(a);
5123 cloff = get_cacheline_offset(a);
5124 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005125 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005126 descr = cl->descrs[tno];
5127 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5128 goto slowcase;
5129 }
5130 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005131 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005132 if (CHECK_ZSM)
5133 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005134 cl->svals[cloff] = svNew;
5135 return;
5136 slowcase: /* misaligned, or must go further down the tree */
5137 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005138 zsm_sapply32__msmcread( thr, a + 0 );
5139 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005140}
5141
sewardj23f12002009-07-24 08:45:08 +00005142static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005143 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005144 UWord cloff, tno;
5145 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005146 SVal svOld, svNew;
5147 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005148 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005149 if (UNLIKELY(!aligned64(a))) goto slowcase;
5150 cl = get_cacheline(a);
5151 cloff = get_cacheline_offset(a);
5152 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005153 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005154 descr = cl->descrs[tno];
5155 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5156 goto slowcase;
5157 }
5158 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005159 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005160 if (CHECK_ZSM)
5161 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005162 cl->svals[cloff] = svNew;
5163 return;
5164 slowcase: /* misaligned, or must go further down the tree */
5165 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005166 zsm_sapply32__msmcwrite( thr, a + 0 );
5167 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005168}
5169
sewardj23f12002009-07-24 08:45:08 +00005170/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005171
5172static
sewardj23f12002009-07-24 08:45:08 +00005173void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005174 CacheLine* cl;
5175 UWord cloff, tno, toff;
5176 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005177 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005178 cl = get_cacheline(a);
5179 cloff = get_cacheline_offset(a);
5180 tno = get_treeno(a);
5181 toff = get_tree_offset(a); /* == 0 .. 7 */
5182 descr = cl->descrs[tno];
5183 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5184 SVal* tree = &cl->svals[tno << 3];
5185 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005186 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005187 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5188 }
5189 tl_assert(svNew != SVal_INVALID);
5190 cl->svals[cloff] = svNew;
5191}
5192
sewardj23f12002009-07-24 08:45:08 +00005193/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005194
5195static
sewardj23f12002009-07-24 08:45:08 +00005196void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005197 CacheLine* cl;
5198 UWord cloff, tno, toff;
5199 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005200 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005201 if (UNLIKELY(!aligned16(a))) goto slowcase;
5202 cl = get_cacheline(a);
5203 cloff = get_cacheline_offset(a);
5204 tno = get_treeno(a);
5205 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5206 descr = cl->descrs[tno];
5207 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5208 if (valid_value_is_below_me_16(descr, toff)) {
5209 /* Writing at this level. Need to fix up 'descr'. */
5210 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5211 /* At this point, the tree does not match cl->descr[tno] any
5212 more. The assignments below will fix it up. */
5213 } else {
5214 /* We can't indiscriminately write on the w16 node as in the
5215 w64 case, as that might make the node inconsistent with
5216 its parent. So first, pull down to this level. */
5217 SVal* tree = &cl->svals[tno << 3];
5218 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005219 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005220 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5221 }
5222 }
5223 tl_assert(svNew != SVal_INVALID);
5224 cl->svals[cloff + 0] = svNew;
5225 cl->svals[cloff + 1] = SVal_INVALID;
5226 return;
5227 slowcase: /* misaligned */
5228 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005229 zsm_swrite08( a + 0, svNew );
5230 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005231}
5232
sewardj23f12002009-07-24 08:45:08 +00005233/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005234
5235static
sewardj23f12002009-07-24 08:45:08 +00005236void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005237 CacheLine* cl;
5238 UWord cloff, tno, toff;
5239 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005240 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005241 if (UNLIKELY(!aligned32(a))) goto slowcase;
5242 cl = get_cacheline(a);
5243 cloff = get_cacheline_offset(a);
5244 tno = get_treeno(a);
5245 toff = get_tree_offset(a); /* == 0 or 4 */
5246 descr = cl->descrs[tno];
5247 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5248 if (valid_value_is_above_me_32(descr, toff)) {
5249 /* We can't indiscriminately write on the w32 node as in the
5250 w64 case, as that might make the node inconsistent with
5251 its parent. So first, pull down to this level. */
5252 SVal* tree = &cl->svals[tno << 3];
5253 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005254 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005255 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5256 } else {
5257 /* Writing at this level. Need to fix up 'descr'. */
5258 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5259 /* At this point, the tree does not match cl->descr[tno] any
5260 more. The assignments below will fix it up. */
5261 }
5262 }
5263 tl_assert(svNew != SVal_INVALID);
5264 cl->svals[cloff + 0] = svNew;
5265 cl->svals[cloff + 1] = SVal_INVALID;
5266 cl->svals[cloff + 2] = SVal_INVALID;
5267 cl->svals[cloff + 3] = SVal_INVALID;
5268 return;
5269 slowcase: /* misaligned */
5270 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005271 zsm_swrite16( a + 0, svNew );
5272 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005273}
5274
sewardj23f12002009-07-24 08:45:08 +00005275/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005276
5277static
sewardj23f12002009-07-24 08:45:08 +00005278void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005279 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005280 UWord cloff, tno;
5281 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00005282 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005283 if (UNLIKELY(!aligned64(a))) goto slowcase;
5284 cl = get_cacheline(a);
5285 cloff = get_cacheline_offset(a);
5286 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005287 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005288 cl->descrs[tno] = TREE_DESCR_64;
5289 tl_assert(svNew != SVal_INVALID);
5290 cl->svals[cloff + 0] = svNew;
5291 cl->svals[cloff + 1] = SVal_INVALID;
5292 cl->svals[cloff + 2] = SVal_INVALID;
5293 cl->svals[cloff + 3] = SVal_INVALID;
5294 cl->svals[cloff + 4] = SVal_INVALID;
5295 cl->svals[cloff + 5] = SVal_INVALID;
5296 cl->svals[cloff + 6] = SVal_INVALID;
5297 cl->svals[cloff + 7] = SVal_INVALID;
5298 return;
5299 slowcase: /* misaligned */
5300 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005301 zsm_swrite32( a + 0, svNew );
5302 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005303}
5304
sewardj23f12002009-07-24 08:45:08 +00005305/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005306
5307static
sewardj23f12002009-07-24 08:45:08 +00005308SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005309 CacheLine* cl;
5310 UWord cloff, tno, toff;
5311 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005312 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005313 cl = get_cacheline(a);
5314 cloff = get_cacheline_offset(a);
5315 tno = get_treeno(a);
5316 toff = get_tree_offset(a); /* == 0 .. 7 */
5317 descr = cl->descrs[tno];
5318 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5319 SVal* tree = &cl->svals[tno << 3];
5320 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5321 }
5322 return cl->svals[cloff];
5323}
5324
sewardj23f12002009-07-24 08:45:08 +00005325static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00005326 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00005327 stats__cline_scopy08s++;
5328 sv = zsm_sread08( src );
5329 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00005330}
5331
5332
sewardj23f12002009-07-24 08:45:08 +00005333/* Block-copy states (needed for implementing realloc()). Note this
5334 doesn't change the filtering arrangements. The caller of
5335 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00005336
sewardj23f12002009-07-24 08:45:08 +00005337static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005338{
5339 SizeT i;
5340 if (len == 0)
5341 return;
5342
5343 /* assert for non-overlappingness */
5344 tl_assert(src+len <= dst || dst+len <= src);
5345
5346 /* To be simple, just copy byte by byte. But so as not to wreck
5347 performance for later accesses to dst[0 .. len-1], normalise
5348 destination lines as we finish with them, and also normalise the
5349 line containing the first and last address. */
5350 for (i = 0; i < len; i++) {
5351 Bool normalise
5352 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5353 || i == 0 /* first in range */
5354 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00005355 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00005356 }
5357}
5358
5359
5360/* For setting address ranges to a given value. Has considerable
5361 sophistication so as to avoid generating large numbers of pointless
5362 cache loads/writebacks for large ranges. */
5363
5364/* Do small ranges in-cache, in the obvious way. */
5365static
sewardj23f12002009-07-24 08:45:08 +00005366void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005367{
5368 /* fast track a couple of common cases */
5369 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00005370 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005371 return;
5372 }
5373 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00005374 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005375 return;
5376 }
5377
5378 /* be completely general (but as efficient as possible) */
5379 if (len == 0) return;
5380
5381 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005382 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005383 a += 1;
5384 len -= 1;
5385 tl_assert(aligned16(a));
5386 }
5387 if (len == 0) return;
5388
5389 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005390 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005391 a += 2;
5392 len -= 2;
5393 tl_assert(aligned32(a));
5394 }
5395 if (len == 0) return;
5396
5397 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005398 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005399 a += 4;
5400 len -= 4;
5401 tl_assert(aligned64(a));
5402 }
5403 if (len == 0) return;
5404
5405 if (len >= 8) {
5406 tl_assert(aligned64(a));
5407 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00005408 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005409 a += 8;
5410 len -= 8;
5411 }
5412 tl_assert(aligned64(a));
5413 }
5414 if (len == 0) return;
5415
5416 if (len >= 4)
5417 tl_assert(aligned32(a));
5418 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005419 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005420 a += 4;
5421 len -= 4;
5422 }
5423 if (len == 0) return;
5424
5425 if (len >= 2)
5426 tl_assert(aligned16(a));
5427 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005428 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005429 a += 2;
5430 len -= 2;
5431 }
5432 if (len == 0) return;
5433
5434 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005435 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00005436 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00005437 len -= 1;
5438 }
5439 tl_assert(len == 0);
5440}
5441
5442
sewardj23f12002009-07-24 08:45:08 +00005443/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00005444 for larger ranges, try to operate directly on the out-of-cache
5445 representation, rather than dragging lines into the cache,
5446 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00005447 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00005448
sewardj23f12002009-07-24 08:45:08 +00005449 Note that this doesn't change the filtering arrangements. The
5450 caller of zsm_sset_range needs to attend to that. */
5451
5452static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005453{
5454 tl_assert(svNew != SVal_INVALID);
5455 stats__cache_make_New_arange += (ULong)len;
5456
5457 if (0 && len > 500)
5458 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
5459
5460 if (0) {
5461 static UWord n_New_in_cache = 0;
5462 static UWord n_New_not_in_cache = 0;
5463 /* tag is 'a' with the in-line offset masked out,
5464 eg a[31]..a[4] 0000 */
5465 Addr tag = a & ~(N_LINE_ARANGE - 1);
5466 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5467 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5468 n_New_in_cache++;
5469 } else {
5470 n_New_not_in_cache++;
5471 }
5472 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5473 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5474 n_New_in_cache, n_New_not_in_cache );
5475 }
5476
5477 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00005478 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005479 } else {
5480 Addr before_start = a;
5481 Addr aligned_start = cacheline_ROUNDUP(a);
5482 Addr after_start = cacheline_ROUNDDN(a + len);
5483 UWord before_len = aligned_start - before_start;
5484 UWord aligned_len = after_start - aligned_start;
5485 UWord after_len = a + len - after_start;
5486 tl_assert(before_start <= aligned_start);
5487 tl_assert(aligned_start <= after_start);
5488 tl_assert(before_len < N_LINE_ARANGE);
5489 tl_assert(after_len < N_LINE_ARANGE);
5490 tl_assert(get_cacheline_offset(aligned_start) == 0);
5491 if (get_cacheline_offset(a) == 0) {
5492 tl_assert(before_len == 0);
5493 tl_assert(a == aligned_start);
5494 }
5495 if (get_cacheline_offset(a+len) == 0) {
5496 tl_assert(after_len == 0);
5497 tl_assert(after_start == a+len);
5498 }
5499 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005500 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005501 }
5502 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005503 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005504 }
5505 stats__cache_make_New_inZrep += (ULong)aligned_len;
5506
5507 while (1) {
5508 Addr tag;
5509 UWord wix;
5510 if (aligned_start >= after_start)
5511 break;
5512 tl_assert(get_cacheline_offset(aligned_start) == 0);
5513 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5514 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5515 if (tag == cache_shmem.tags0[wix]) {
5516 UWord i;
5517 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005518 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005519 } else {
5520 UWord i;
5521 Word zix;
5522 SecMap* sm;
5523 LineZ* lineZ;
5524 /* This line is not in the cache. Do not force it in; instead
5525 modify it in-place. */
5526 /* find the Z line to write in and rcdec it or the
5527 associated F line. */
5528 find_Z_for_writing( &sm, &zix, tag );
5529 tl_assert(sm);
5530 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5531 lineZ = &sm->linesZ[zix];
5532 lineZ->dict[0] = svNew;
5533 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5534 for (i = 0; i < N_LINE_ARANGE/4; i++)
5535 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5536 rcinc_LineZ(lineZ);
5537 }
5538 aligned_start += N_LINE_ARANGE;
5539 aligned_len -= N_LINE_ARANGE;
5540 }
5541 tl_assert(aligned_start == after_start);
5542 tl_assert(aligned_len == 0);
5543 }
5544}
5545
5546
5547/////////////////////////////////////////////////////////
5548// //
sewardj23f12002009-07-24 08:45:08 +00005549// Front-filtering accesses //
5550// //
5551/////////////////////////////////////////////////////////
5552
5553static UWord stats__f_ac = 0;
5554static UWord stats__f_sk = 0;
5555
5556#if 0
5557# define STATS__F_SHOW \
5558 do { \
5559 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5560 VG_(printf)("filters: ac %lu sk %lu\n", \
5561 stats__f_ac, stats__f_sk); \
5562 } while (0)
5563#else
5564# define STATS__F_SHOW /* */
5565#endif
5566
5567void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5568 stats__f_ac++;
5569 STATS__F_SHOW;
5570 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5571 stats__f_sk++;
5572 return;
5573 }
5574 zsm_sapply08__msmcwrite(thr, a);
5575}
5576
5577void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5578 stats__f_ac++;
5579 STATS__F_SHOW;
5580 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5581 stats__f_sk++;
5582 return;
5583 }
5584 zsm_sapply16__msmcwrite(thr, a);
5585}
5586
5587void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5588 stats__f_ac++;
5589 STATS__F_SHOW;
5590 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5591 stats__f_sk++;
5592 return;
5593 }
5594 zsm_sapply32__msmcwrite(thr, a);
5595}
5596
5597void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5598 stats__f_ac++;
5599 STATS__F_SHOW;
5600 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5601 stats__f_sk++;
5602 return;
5603 }
5604 zsm_sapply64__msmcwrite(thr, a);
5605}
5606
5607void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5608{
5609 /* fast track a couple of common cases */
5610 if (len == 4 && aligned32(a)) {
5611 zsm_sapply32_f__msmcwrite( thr, a );
5612 return;
5613 }
5614 if (len == 8 && aligned64(a)) {
5615 zsm_sapply64_f__msmcwrite( thr, a );
5616 return;
5617 }
5618
5619 /* be completely general (but as efficient as possible) */
5620 if (len == 0) return;
5621
5622 if (!aligned16(a) && len >= 1) {
5623 zsm_sapply08_f__msmcwrite( thr, a );
5624 a += 1;
5625 len -= 1;
5626 tl_assert(aligned16(a));
5627 }
5628 if (len == 0) return;
5629
5630 if (!aligned32(a) && len >= 2) {
5631 zsm_sapply16_f__msmcwrite( thr, a );
5632 a += 2;
5633 len -= 2;
5634 tl_assert(aligned32(a));
5635 }
5636 if (len == 0) return;
5637
5638 if (!aligned64(a) && len >= 4) {
5639 zsm_sapply32_f__msmcwrite( thr, a );
5640 a += 4;
5641 len -= 4;
5642 tl_assert(aligned64(a));
5643 }
5644 if (len == 0) return;
5645
5646 if (len >= 8) {
5647 tl_assert(aligned64(a));
5648 while (len >= 8) {
5649 zsm_sapply64_f__msmcwrite( thr, a );
5650 a += 8;
5651 len -= 8;
5652 }
5653 tl_assert(aligned64(a));
5654 }
5655 if (len == 0) return;
5656
5657 if (len >= 4)
5658 tl_assert(aligned32(a));
5659 if (len >= 4) {
5660 zsm_sapply32_f__msmcwrite( thr, a );
5661 a += 4;
5662 len -= 4;
5663 }
5664 if (len == 0) return;
5665
5666 if (len >= 2)
5667 tl_assert(aligned16(a));
5668 if (len >= 2) {
5669 zsm_sapply16_f__msmcwrite( thr, a );
5670 a += 2;
5671 len -= 2;
5672 }
5673 if (len == 0) return;
5674
5675 if (len >= 1) {
5676 zsm_sapply08_f__msmcwrite( thr, a );
5677 //a += 1;
5678 len -= 1;
5679 }
5680 tl_assert(len == 0);
5681}
5682
5683void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5684 stats__f_ac++;
5685 STATS__F_SHOW;
5686 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5687 stats__f_sk++;
5688 return;
5689 }
5690 zsm_sapply08__msmcread(thr, a);
5691}
5692
5693void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5694 stats__f_ac++;
5695 STATS__F_SHOW;
5696 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5697 stats__f_sk++;
5698 return;
5699 }
5700 zsm_sapply16__msmcread(thr, a);
5701}
5702
5703void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5704 stats__f_ac++;
5705 STATS__F_SHOW;
5706 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5707 stats__f_sk++;
5708 return;
5709 }
5710 zsm_sapply32__msmcread(thr, a);
5711}
5712
5713void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5714 stats__f_ac++;
5715 STATS__F_SHOW;
5716 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5717 stats__f_sk++;
5718 return;
5719 }
5720 zsm_sapply64__msmcread(thr, a);
5721}
5722
5723void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5724{
5725 /* fast track a couple of common cases */
5726 if (len == 4 && aligned32(a)) {
5727 zsm_sapply32_f__msmcread( thr, a );
5728 return;
5729 }
5730 if (len == 8 && aligned64(a)) {
5731 zsm_sapply64_f__msmcread( thr, a );
5732 return;
5733 }
5734
5735 /* be completely general (but as efficient as possible) */
5736 if (len == 0) return;
5737
5738 if (!aligned16(a) && len >= 1) {
5739 zsm_sapply08_f__msmcread( thr, a );
5740 a += 1;
5741 len -= 1;
5742 tl_assert(aligned16(a));
5743 }
5744 if (len == 0) return;
5745
5746 if (!aligned32(a) && len >= 2) {
5747 zsm_sapply16_f__msmcread( thr, a );
5748 a += 2;
5749 len -= 2;
5750 tl_assert(aligned32(a));
5751 }
5752 if (len == 0) return;
5753
5754 if (!aligned64(a) && len >= 4) {
5755 zsm_sapply32_f__msmcread( thr, a );
5756 a += 4;
5757 len -= 4;
5758 tl_assert(aligned64(a));
5759 }
5760 if (len == 0) return;
5761
5762 if (len >= 8) {
5763 tl_assert(aligned64(a));
5764 while (len >= 8) {
5765 zsm_sapply64_f__msmcread( thr, a );
5766 a += 8;
5767 len -= 8;
5768 }
5769 tl_assert(aligned64(a));
5770 }
5771 if (len == 0) return;
5772
5773 if (len >= 4)
5774 tl_assert(aligned32(a));
5775 if (len >= 4) {
5776 zsm_sapply32_f__msmcread( thr, a );
5777 a += 4;
5778 len -= 4;
5779 }
5780 if (len == 0) return;
5781
5782 if (len >= 2)
5783 tl_assert(aligned16(a));
5784 if (len >= 2) {
5785 zsm_sapply16_f__msmcread( thr, a );
5786 a += 2;
5787 len -= 2;
5788 }
5789 if (len == 0) return;
5790
5791 if (len >= 1) {
5792 zsm_sapply08_f__msmcread( thr, a );
5793 //a += 1;
5794 len -= 1;
5795 }
5796 tl_assert(len == 0);
5797}
5798
5799void libhb_Thr_resumes ( Thr* thr )
5800{
5801 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005802 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00005803 tl_assert(!thr->llexit_done);
sewardj23f12002009-07-24 08:45:08 +00005804 Filter__clear(thr->filter, "libhb_Thr_resumes");
5805 /* A kludge, but .. if this thread doesn't have any marker stacks
5806 at all, get one right now. This is easier than figuring out
5807 exactly when at thread startup we can and can't take a stack
5808 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00005809 if (HG_(clo_history_level) == 1) {
5810 tl_assert(thr->local_Kws_n_stacks);
5811 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
5812 note_local_Kw_n_stack_for(thr);
5813 }
sewardj23f12002009-07-24 08:45:08 +00005814}
5815
5816
5817/////////////////////////////////////////////////////////
5818// //
sewardjf98e1c02008-10-25 16:22:41 +00005819// Synchronisation objects //
5820// //
5821/////////////////////////////////////////////////////////
5822
sewardjffce8152011-06-24 10:09:41 +00005823/* A double linked list of all the SO's. */
5824SO* admin_SO = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00005825
sewardjffce8152011-06-24 10:09:41 +00005826static SO* SO__Alloc ( void )
5827{
sewardjf98e1c02008-10-25 16:22:41 +00005828 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
5829 so->viR = VtsID_INVALID;
5830 so->viW = VtsID_INVALID;
5831 so->magic = SO_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00005832 /* Add to double linked list */
5833 if (admin_SO) {
5834 tl_assert(admin_SO->admin_prev == NULL);
5835 admin_SO->admin_prev = so;
5836 so->admin_next = admin_SO;
5837 } else {
5838 so->admin_next = NULL;
5839 }
5840 so->admin_prev = NULL;
5841 admin_SO = so;
5842 /* */
sewardjf98e1c02008-10-25 16:22:41 +00005843 return so;
5844}
sewardjffce8152011-06-24 10:09:41 +00005845
5846static void SO__Dealloc ( SO* so )
5847{
sewardjf98e1c02008-10-25 16:22:41 +00005848 tl_assert(so);
5849 tl_assert(so->magic == SO_MAGIC);
5850 if (so->viR == VtsID_INVALID) {
5851 tl_assert(so->viW == VtsID_INVALID);
5852 } else {
5853 tl_assert(so->viW != VtsID_INVALID);
5854 VtsID__rcdec(so->viR);
5855 VtsID__rcdec(so->viW);
5856 }
5857 so->magic = 0;
sewardjffce8152011-06-24 10:09:41 +00005858 /* Del from double linked list */
5859 if (so->admin_prev)
5860 so->admin_prev->admin_next = so->admin_next;
5861 if (so->admin_next)
5862 so->admin_next->admin_prev = so->admin_prev;
5863 if (so == admin_SO)
5864 admin_SO = so->admin_next;
5865 /* */
sewardjf98e1c02008-10-25 16:22:41 +00005866 HG_(free)( so );
5867}
5868
5869
5870/////////////////////////////////////////////////////////
5871// //
5872// Top Level API //
5873// //
5874/////////////////////////////////////////////////////////
5875
florian6bd9dc12012-11-23 16:17:43 +00005876static void show_thread_state ( const HChar* str, Thr* t )
sewardjf98e1c02008-10-25 16:22:41 +00005877{
5878 if (1) return;
5879 if (t->viR == t->viW) {
5880 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
5881 VtsID__pp( t->viR );
5882 VG_(printf)("%s","\n");
5883 } else {
5884 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
5885 VtsID__pp( t->viR );
5886 VG_(printf)(" viW %u==", t->viW);
5887 VtsID__pp( t->viW );
5888 VG_(printf)("%s","\n");
5889 }
5890}
5891
5892
5893Thr* libhb_init (
5894 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00005895 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00005896 )
5897{
5898 Thr* thr;
5899 VtsID vi;
sewardje4cce742011-02-24 15:25:24 +00005900
5901 // We will have to have to store a large number of these,
5902 // so make sure they're the size we expect them to be.
5903 tl_assert(sizeof(ScalarTS) == 8);
sewardjffce8152011-06-24 10:09:41 +00005904
5905 /* because first 1024 unusable */
5906 tl_assert(SCALARTS_N_THRBITS >= 11);
5907 /* so as to fit in a UInt w/ 3 bits to spare (see defn of
5908 Thr_n_RCEC). */
5909 tl_assert(SCALARTS_N_THRBITS <= 29);
5910
5911 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
5912 (32-bit). It's not correctness-critical, but there are a lot of
5913 them, so it's important from a space viewpoint. Unfortunately
5914 we simply can't pack it into 2 words on a 32-bit target. */
5915 if (sizeof(UWord) == 8) {
5916 tl_assert(sizeof(Thr_n_RCEC) == 16);
5917 } else {
5918 tl_assert(sizeof(Thr_n_RCEC) == 12);
5919 }
5920
5921 /* Word sets really are 32 bits. Even on a 64 bit target. */
5922 tl_assert(sizeof(WordSetID) == 4);
5923 tl_assert(sizeof(WordSet) == sizeof(WordSetID));
sewardje4cce742011-02-24 15:25:24 +00005924
sewardjf98e1c02008-10-25 16:22:41 +00005925 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00005926 tl_assert(get_EC);
5927 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00005928 main_get_EC = get_EC;
5929
5930 // No need to initialise hg_wordfm.
5931 // No need to initialise hg_wordset.
5932
sewardj7aa38a92011-02-27 23:04:12 +00005933 /* Allocated once and never deallocated. Used as a temporary in
5934 VTS singleton, tick and join operations. */
5935 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
5936 temp_max_sized_VTS->id = VtsID_INVALID;
philippec3508652015-03-28 12:01:58 +00005937 verydead_thread_tables_init();
sewardjf98e1c02008-10-25 16:22:41 +00005938 vts_set_init();
5939 vts_tab_init();
5940 event_map_init();
5941 VtsID__invalidate_caches();
5942
5943 // initialise shadow memory
5944 zsm_init( SVal__rcinc, SVal__rcdec );
5945
5946 thr = Thr__new();
5947 vi = VtsID__mk_Singleton( thr, 1 );
5948 thr->viR = vi;
5949 thr->viW = vi;
5950 VtsID__rcinc(thr->viR);
5951 VtsID__rcinc(thr->viW);
5952
5953 show_thread_state(" root", thr);
5954 return thr;
5955}
5956
sewardj23f12002009-07-24 08:45:08 +00005957
sewardjf98e1c02008-10-25 16:22:41 +00005958Thr* libhb_create ( Thr* parent )
5959{
5960 /* The child's VTSs are copies of the parent's VTSs, but ticked at
5961 the child's index. Since the child's index is guaranteed
5962 unique, it has never been seen before, so the implicit value
5963 before the tick is zero and after that is one. */
5964 Thr* child = Thr__new();
5965
5966 child->viR = VtsID__tick( parent->viR, child );
5967 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00005968 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00005969 VtsID__rcinc(child->viR);
5970 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00005971 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00005972 early for that - it may not have a valid TId yet. So, let
5973 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00005974
5975 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
5976 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
5977
5978 /* and the parent has to move along too */
5979 VtsID__rcdec(parent->viR);
5980 VtsID__rcdec(parent->viW);
5981 parent->viR = VtsID__tick( parent->viR, parent );
5982 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00005983 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00005984 VtsID__rcinc(parent->viR);
5985 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00005986 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00005987
5988 show_thread_state(" child", child);
5989 show_thread_state("parent", parent);
5990
5991 return child;
5992}
5993
5994/* Shut down the library, and print stats (in fact that's _all_
5995 this is for. */
5996void libhb_shutdown ( Bool show_stats )
5997{
5998 if (show_stats) {
5999 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6000 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6001 stats__secmaps_allocd,
6002 stats__secmap_ga_space_covered);
6003 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6004 stats__secmap_linesZ_allocd,
6005 stats__secmap_linesZ_bytes);
6006 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
6007 stats__secmap_linesF_allocd,
6008 stats__secmap_linesF_bytes);
6009 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
6010 stats__secmap_iterator_steppings);
6011 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6012 stats__secmaps_search, stats__secmaps_search_slow);
6013
6014 VG_(printf)("%s","\n");
6015 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6016 stats__cache_totrefs, stats__cache_totmisses );
6017 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6018 stats__cache_Z_fetches, stats__cache_F_fetches );
6019 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6020 stats__cache_Z_wbacks, stats__cache_F_wbacks );
6021 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
6022 stats__cache_invals, stats__cache_flushes );
6023 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6024 stats__cache_make_New_arange,
6025 stats__cache_make_New_inZrep);
6026
6027 VG_(printf)("%s","\n");
6028 VG_(printf)(" cline: %'10lu normalises\n",
6029 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00006030 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6031 stats__cline_cread64s,
6032 stats__cline_cread32s,
6033 stats__cline_cread16s,
6034 stats__cline_cread08s );
6035 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6036 stats__cline_cwrite64s,
6037 stats__cline_cwrite32s,
6038 stats__cline_cwrite16s,
6039 stats__cline_cwrite08s );
6040 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6041 stats__cline_swrite64s,
6042 stats__cline_swrite32s,
6043 stats__cline_swrite16s,
6044 stats__cline_swrite08s );
6045 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6046 stats__cline_sread08s, stats__cline_scopy08s );
sewardjf98e1c02008-10-25 16:22:41 +00006047 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
6048 stats__cline_64to32splits,
6049 stats__cline_32to16splits,
6050 stats__cline_16to8splits );
6051 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
6052 stats__cline_64to32pulldown,
6053 stats__cline_32to16pulldown,
6054 stats__cline_16to8pulldown );
6055 if (0)
6056 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
6057 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
6058
6059 VG_(printf)("%s","\n");
6060
sewardjc8028ad2010-05-05 09:34:42 +00006061 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006062 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00006063 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006064 stats__msmcwrite, stats__msmcwrite_change);
6065 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6066 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00006067 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6068 stats__join2_queries, stats__join2_misses);
6069
6070 VG_(printf)("%s","\n");
sewardjc8028ad2010-05-05 09:34:42 +00006071 VG_(printf)( " libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6072 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6073 VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6074 stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
sewardj7aa38a92011-02-27 23:04:12 +00006075 VG_(printf)( " libhb: VTSset: find__or__clone_and_add %'lu (%'lu allocd)\n",
6076 stats__vts_set__focaa, stats__vts_set__focaa_a );
sewardjc8028ad2010-05-05 09:34:42 +00006077 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6078 stats__vts__indexat_slow );
philippea1ac2f42015-05-01 17:12:00 +00006079 show_vts_stats ("libhb stats");
sewardjc8028ad2010-05-05 09:34:42 +00006080
6081 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00006082 VG_(printf)(
6083 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6084 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6085 );
6086 VG_(printf)( " libhb: %lu entries in vts_set\n",
6087 VG_(sizeFM)( vts_set ) );
6088
6089 VG_(printf)("%s","\n");
philippe900c5352015-03-24 14:02:44 +00006090 {
6091 UInt live = 0;
6092 UInt llexit_done = 0;
6093 UInt joinedwith_done = 0;
6094 UInt llexit_and_joinedwith_done = 0;
6095
6096 Thread* hgthread = get_admin_threads();
6097 tl_assert(hgthread);
6098 while (hgthread) {
6099 Thr* hbthr = hgthread->hbthr;
6100 tl_assert(hbthr);
6101 if (hbthr->llexit_done && hbthr->joinedwith_done)
6102 llexit_and_joinedwith_done++;
6103 else if (hbthr->llexit_done)
6104 llexit_done++;
6105 else if (hbthr->joinedwith_done)
6106 joinedwith_done++;
6107 else
6108 live++;
6109 hgthread = hgthread->admin;
6110 }
6111 VG_(printf)(" libhb: threads live: %d exit_and_joinedwith %d"
6112 " exit %d joinedwith %d\n",
6113 live, llexit_and_joinedwith_done,
6114 llexit_done, joinedwith_done);
philippec3508652015-03-28 12:01:58 +00006115 VG_(printf)(" libhb: %d verydead_threads, "
6116 "%d verydead_threads_not_pruned\n",
6117 (int) VG_(sizeXA)( verydead_thread_table),
6118 (int) VG_(sizeXA)( verydead_thread_table_not_pruned));
6119 tl_assert (VG_(sizeXA)( verydead_thread_table)
6120 + VG_(sizeXA)( verydead_thread_table_not_pruned)
6121 == llexit_and_joinedwith_done);
philippe900c5352015-03-24 14:02:44 +00006122 }
6123
6124 VG_(printf)("%s","\n");
philipped005b2c2015-04-21 21:58:14 +00006125 {
6126 UWord OldRef_accs_n[N_OLDREF_ACCS+1];
6127 UInt accs_n;
6128 UWord OldRef_n;
6129 UInt i;
6130
6131 OldRef_n = 0;
6132 for (i = 0; i <= N_OLDREF_ACCS; i++)
6133 OldRef_accs_n[i] = 0;
6134
6135 for (OldRef* o = mru.prev; o != &lru; o = o->prev) {
6136 OldRef_n++;
6137 accs_n = 0;
6138 for (i = 0; i < N_OLDREF_ACCS; i++) {
6139 if (o->accs[i].thrid != 0)
6140 accs_n++;
6141 }
6142 OldRef_accs_n[accs_n]++;
6143 }
6144
6145 tl_assert(OldRef_n == oldrefTreeN);
6146 VG_(printf)( " libhb: oldrefTreeN %lu ", oldrefTreeN);
6147 VG_(printf)( "( ");
6148 for (i = 0; i <= N_OLDREF_ACCS; i++)
6149 VG_(printf)( "accs[%d]=%lu ", i, OldRef_accs_n[i]);
6150 VG_(printf)( ")\n");
6151 }
sewardjf98e1c02008-10-25 16:22:41 +00006152 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
6153 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
6154 stats__ctxt_rcdec2,
6155 stats__ctxt_rcdec3 );
6156 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
6157 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
philippecabdbb52015-04-20 21:33:16 +00006158 VG_(printf)( " libhb: contextTab: %lu slots,"
6159 " %lu cur ents(ref'd %lu),"
philippe06bc23a2015-04-17 21:19:43 +00006160 " %lu max ents\n",
sewardjf98e1c02008-10-25 16:22:41 +00006161 (UWord)N_RCEC_TAB,
philippecabdbb52015-04-20 21:33:16 +00006162 stats__ctxt_tab_curr, RCEC_referenced,
6163 stats__ctxt_tab_max );
philippe47124e92015-04-25 14:00:24 +00006164 {
6165# define MAXCHAIN 10
6166 UInt chains[MAXCHAIN+1]; // [MAXCHAIN] gets all chains >= MAXCHAIN
6167 UInt non0chain = 0;
6168 UInt n;
6169 UInt i;
6170 RCEC *p;
6171
6172 for (i = 0; i <= MAXCHAIN; i++) chains[i] = 0;
6173 for (i = 0; i < N_RCEC_TAB; i++) {
6174 n = 0;
6175 for (p = contextTab[i]; p; p = p->next)
6176 n++;
6177 if (n < MAXCHAIN)
6178 chains[n]++;
6179 else
6180 chains[MAXCHAIN]++;
6181 if (n > 0)
6182 non0chain++;
6183 }
6184 VG_(printf)( " libhb: contextTab chain of [length]=nchain."
6185 " Avg chain len %3.1f\n"
6186 " ",
6187 (Double)stats__ctxt_tab_curr
6188 / (Double)(non0chain ? non0chain : 1));
6189 for (i = 0; i <= MAXCHAIN; i++) {
6190 if (chains[i] != 0)
6191 VG_(printf)( "[%d%s]=%d ",
6192 i, i == MAXCHAIN ? "+" : "",
6193 chains[i]);
6194 }
6195 VG_(printf)( "\n");
6196# undef MAXCHAIN
6197 }
sewardjf98e1c02008-10-25 16:22:41 +00006198 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6199 stats__ctxt_tab_qs,
6200 stats__ctxt_tab_cmps );
6201#if 0
6202 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6203 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6204 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6205 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6206 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6207 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6208 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6209 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6210 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6211 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6212 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6213 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6214 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6215 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6216
6217 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6218 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6219 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6220 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6221#endif
6222
6223 VG_(printf)("%s","<<< END libhb stats >>>\n");
6224 VG_(printf)("%s","\n");
6225
6226 }
6227}
6228
sewardjffce8152011-06-24 10:09:41 +00006229/* Receive notification that a thread has low level exited. The
6230 significance here is that we do not expect to see any more memory
6231 references from it. */
sewardjf98e1c02008-10-25 16:22:41 +00006232void libhb_async_exit ( Thr* thr )
6233{
sewardj23f12002009-07-24 08:45:08 +00006234 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006235 tl_assert(!thr->llexit_done);
6236 thr->llexit_done = True;
sewardj2d2ea2f2009-08-02 10:15:07 +00006237
6238 /* free up Filter and local_Kws_n_stacks (well, actually not the
6239 latter ..) */
6240 tl_assert(thr->filter);
6241 HG_(free)(thr->filter);
6242 thr->filter = NULL;
6243
sewardjffce8152011-06-24 10:09:41 +00006244 /* Tell the VTS mechanism this thread has exited, so it can
6245 participate in VTS pruning. Note this can only happen if the
6246 thread has both ll_exited and has been joined with. */
6247 if (thr->joinedwith_done)
6248 VTS__declare_thread_very_dead(thr);
6249
sewardj2d2ea2f2009-08-02 10:15:07 +00006250 /* Another space-accuracy tradeoff. Do we want to be able to show
6251 H1 history for conflicts in threads which have since exited? If
6252 yes, then we better not free up thr->local_Kws_n_stacks. The
6253 downside is a potential per-thread leak of up to
6254 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6255 XArray average overcommit factor is (1.5 I'd guess). */
6256 // hence:
6257 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6258 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006259}
6260
sewardjffce8152011-06-24 10:09:41 +00006261/* Receive notification that a thread has been joined with. The
6262 significance here is that we do not expect to see any further
6263 references to its vector clocks (Thr::viR and Thr::viW). */
6264void libhb_joinedwith_done ( Thr* thr )
6265{
6266 tl_assert(thr);
6267 /* Caller must ensure that this is only ever called once per Thr. */
6268 tl_assert(!thr->joinedwith_done);
6269 thr->joinedwith_done = True;
6270 if (thr->llexit_done)
6271 VTS__declare_thread_very_dead(thr);
6272}
6273
6274
sewardjf98e1c02008-10-25 16:22:41 +00006275/* Both Segs and SOs point to VTSs. However, there is no sharing, so
6276 a Seg that points at a VTS is its one-and-only owner, and ditto for
6277 a SO that points at a VTS. */
6278
6279SO* libhb_so_alloc ( void )
6280{
6281 return SO__Alloc();
6282}
6283
6284void libhb_so_dealloc ( SO* so )
6285{
6286 tl_assert(so);
6287 tl_assert(so->magic == SO_MAGIC);
6288 SO__Dealloc(so);
6289}
6290
6291/* See comments in libhb.h for details on the meaning of
6292 strong vs weak sends and strong vs weak receives. */
6293void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6294{
6295 /* Copy the VTSs from 'thr' into the sync object, and then move
6296 the thread along one step. */
6297
6298 tl_assert(so);
6299 tl_assert(so->magic == SO_MAGIC);
6300
6301 /* stay sane .. a thread's read-clock must always lead or be the
6302 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00006303 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6304 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00006305 }
6306
6307 /* since we're overwriting the VtsIDs in the SO, we need to drop
6308 any references made by the previous contents thereof */
6309 if (so->viR == VtsID_INVALID) {
6310 tl_assert(so->viW == VtsID_INVALID);
6311 so->viR = thr->viR;
6312 so->viW = thr->viW;
6313 VtsID__rcinc(so->viR);
6314 VtsID__rcinc(so->viW);
6315 } else {
6316 /* In a strong send, we dump any previous VC in the SO and
6317 install the sending thread's VC instead. For a weak send we
6318 must join2 with what's already there. */
6319 tl_assert(so->viW != VtsID_INVALID);
6320 VtsID__rcdec(so->viR);
6321 VtsID__rcdec(so->viW);
6322 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6323 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6324 VtsID__rcinc(so->viR);
6325 VtsID__rcinc(so->viW);
6326 }
6327
6328 /* move both parent clocks along */
6329 VtsID__rcdec(thr->viR);
6330 VtsID__rcdec(thr->viW);
6331 thr->viR = VtsID__tick( thr->viR, thr );
6332 thr->viW = VtsID__tick( thr->viW, thr );
sewardjffce8152011-06-24 10:09:41 +00006333 if (!thr->llexit_done) {
sewardj2d2ea2f2009-08-02 10:15:07 +00006334 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00006335 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006336 }
sewardjf98e1c02008-10-25 16:22:41 +00006337 VtsID__rcinc(thr->viR);
6338 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006339
sewardjf98e1c02008-10-25 16:22:41 +00006340 if (strong_send)
6341 show_thread_state("s-send", thr);
6342 else
6343 show_thread_state("w-send", thr);
6344}
6345
6346void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6347{
6348 tl_assert(so);
6349 tl_assert(so->magic == SO_MAGIC);
6350
6351 if (so->viR != VtsID_INVALID) {
6352 tl_assert(so->viW != VtsID_INVALID);
6353
6354 /* Weak receive (basically, an R-acquisition of a R-W lock).
6355 This advances the read-clock of the receiver, but not the
6356 write-clock. */
6357 VtsID__rcdec(thr->viR);
6358 thr->viR = VtsID__join2( thr->viR, so->viR );
6359 VtsID__rcinc(thr->viR);
6360
sewardj90eb22e2009-07-28 20:22:18 +00006361 /* At one point (r10589) it seemed safest to tick the clocks for
6362 the receiving thread after the join. But on reflection, I
6363 wonder if that might cause it to 'overtake' constraints,
6364 which could lead to missing races. So, back out that part of
6365 r10589. */
6366 //VtsID__rcdec(thr->viR);
6367 //thr->viR = VtsID__tick( thr->viR, thr );
6368 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00006369
sewardjf98e1c02008-10-25 16:22:41 +00006370 /* For a strong receive, we also advance the receiver's write
6371 clock, which means the receive as a whole is essentially
6372 equivalent to a W-acquisition of a R-W lock. */
6373 if (strong_recv) {
6374 VtsID__rcdec(thr->viW);
6375 thr->viW = VtsID__join2( thr->viW, so->viW );
6376 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006377
sewardj90eb22e2009-07-28 20:22:18 +00006378 /* See comment just above, re r10589. */
6379 //VtsID__rcdec(thr->viW);
6380 //thr->viW = VtsID__tick( thr->viW, thr );
6381 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00006382 }
6383
sewardjf4845dc2010-05-28 20:09:59 +00006384 if (thr->filter)
6385 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00006386 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00006387
sewardjf98e1c02008-10-25 16:22:41 +00006388 if (strong_recv)
6389 show_thread_state("s-recv", thr);
6390 else
6391 show_thread_state("w-recv", thr);
6392
6393 } else {
6394 tl_assert(so->viW == VtsID_INVALID);
6395 /* Deal with degenerate case: 'so' has no vts, so there has been
6396 no message posted to it. Just ignore this case. */
6397 show_thread_state("d-recv", thr);
6398 }
6399}
6400
6401Bool libhb_so_everSent ( SO* so )
6402{
6403 if (so->viR == VtsID_INVALID) {
6404 tl_assert(so->viW == VtsID_INVALID);
6405 return False;
6406 } else {
6407 tl_assert(so->viW != VtsID_INVALID);
6408 return True;
6409 }
6410}
6411
6412#define XXX1 0 // 0x67a106c
6413#define XXX2 0
6414
sewardj23f12002009-07-24 08:45:08 +00006415static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00006416 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6417 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6418 return False;
6419}
florian0c8a47c2013-10-01 20:10:21 +00006420static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
florian6bf37262012-10-21 03:23:36 +00006421{
sewardj23f12002009-07-24 08:45:08 +00006422 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00006423 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6424 show_thread_state("", thr);
6425 VG_(printf)("%s","\n");
6426}
6427
sewardj23f12002009-07-24 08:45:08 +00006428void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006429{
6430 SVal sv = SVal__mkC(thr->viW, thr->viW);
6431 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00006432 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6433 zsm_sset_range( a, szB, sv );
6434 Filter__clear_range( thr->filter, a, szB );
6435 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00006436}
6437
sewardjfd35d492011-03-17 19:39:55 +00006438void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006439{
sewardj23f12002009-07-24 08:45:08 +00006440 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00006441}
6442
sewardjfd35d492011-03-17 19:39:55 +00006443void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6444{
6445 /* This really does put the requested range in NoAccess. It's
6446 expensive though. */
6447 SVal sv = SVal_NOACCESS;
6448 tl_assert(is_sane_SVal_C(sv));
6449 zsm_sset_range( a, szB, sv );
6450 Filter__clear_range( thr->filter, a, szB );
6451}
6452
sewardj406bac82010-03-03 23:03:40 +00006453void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
6454{
6455 SVal sv = SVal_NOACCESS;
6456 tl_assert(is_sane_SVal_C(sv));
6457 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
6458 zsm_sset_range( a, szB, sv );
6459 Filter__clear_range( thr->filter, a, szB );
6460 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
6461}
6462
sewardj0b20a152011-03-10 21:34:21 +00006463Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
sewardjf98e1c02008-10-25 16:22:41 +00006464 tl_assert(thr);
sewardj60626642011-03-10 15:14:37 +00006465 return thr->hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006466}
6467
sewardj0b20a152011-03-10 21:34:21 +00006468void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
sewardjf98e1c02008-10-25 16:22:41 +00006469 tl_assert(thr);
sewardj0b20a152011-03-10 21:34:21 +00006470 thr->hgthread = hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006471}
6472
sewardj23f12002009-07-24 08:45:08 +00006473void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00006474{
sewardj23f12002009-07-24 08:45:08 +00006475 zsm_scopy_range(src, dst, len);
6476 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00006477}
6478
6479void libhb_maybe_GC ( void )
6480{
philippecabdbb52015-04-20 21:33:16 +00006481 /* GC the unreferenced (zero rc) RCECs when
philippee0829e02015-04-21 20:55:40 +00006482 (1) reaching a significant nr of RCECs (to avoid scanning a contextTab
6483 with mostly NULL ptr)
6484 and (2) approaching the max nr of RCEC (as we have in any case
6485 at least that amount of RCEC in the pool allocator)
6486 Note: the margin allows to avoid a small but constant increase
6487 of the max nr of RCEC due to the fact that libhb_maybe_GC is
6488 not called when the current nr of RCEC exactly reaches the max.
6489 and (3) the nr of referenced RCECs is less than 75% than total nr RCECs.
6490 Avoid growing too much the nr of RCEC keeps the memory use low,
6491 and avoids to have too many elements in the (fixed) contextTab hashtable.
6492 */
philippecabdbb52015-04-20 21:33:16 +00006493 if (UNLIKELY(stats__ctxt_tab_curr > N_RCEC_TAB/2
philippee0829e02015-04-21 20:55:40 +00006494 && stats__ctxt_tab_curr + 1000 >= stats__ctxt_tab_max
philippecabdbb52015-04-20 21:33:16 +00006495 && stats__ctxt_tab_curr * 0.75 > RCEC_referenced))
6496 do_RCEC_GC();
philippe158404e2015-04-10 19:34:14 +00006497
sewardjf98e1c02008-10-25 16:22:41 +00006498 /* If there are still freelist entries available, no need for a
6499 GC. */
6500 if (vts_tab_freelist != VtsID_INVALID)
6501 return;
6502 /* So all the table entries are full, and we're having to expand
6503 the table. But did we hit the threshhold point yet? */
6504 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
6505 return;
6506 vts_tab__do_GC( False/*don't show stats*/ );
philippecabdbb52015-04-20 21:33:16 +00006507
6508 /* Check the reference counts (expensive) */
6509 if (CHECK_CEM)
6510 event_map__check_reference_counts();
sewardjf98e1c02008-10-25 16:22:41 +00006511}
6512
6513
6514/////////////////////////////////////////////////////////////////
6515/////////////////////////////////////////////////////////////////
6516// //
6517// SECTION END main library //
6518// //
6519/////////////////////////////////////////////////////////////////
6520/////////////////////////////////////////////////////////////////
6521
6522/*--------------------------------------------------------------------*/
6523/*--- end libhb_main.c ---*/
6524/*--------------------------------------------------------------------*/