blob: b146e0a7ad6b3afd3475ea98c602f778a547a396 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
sewardj0f157dd2013-10-18 14:27:36 +000012 Copyright (C) 2008-2013 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000034#include "pub_tool_poolalloc.h"
sewardjf98e1c02008-10-25 16:22:41 +000035#include "pub_tool_libcassert.h"
36#include "pub_tool_libcbase.h"
37#include "pub_tool_libcprint.h"
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000040#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000041#include "pub_tool_xarray.h"
42#include "pub_tool_oset.h"
43#include "pub_tool_threadstate.h"
44#include "pub_tool_aspacemgr.h"
45#include "pub_tool_execontext.h"
46#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000047#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000048#include "hg_basics.h"
49#include "hg_wordset.h"
50#include "hg_lock_n_thread.h"
51#include "hg_errors.h"
52
53#include "libhb.h"
54
55
sewardj8f5374e2008-12-07 11:40:17 +000056/////////////////////////////////////////////////////////////////
57/////////////////////////////////////////////////////////////////
58// //
59// Debugging #defines //
60// //
61/////////////////////////////////////////////////////////////////
62/////////////////////////////////////////////////////////////////
63
64/* Check the sanity of shadow values in the core memory state
65 machine. Change #if 0 to #if 1 to enable this. */
66#if 0
67# define CHECK_MSM 1
68#else
69# define CHECK_MSM 0
70#endif
71
72
73/* Check sanity (reference counts, etc) in the conflicting access
74 machinery. Change #if 0 to #if 1 to enable this. */
75#if 0
76# define CHECK_CEM 1
77#else
78# define CHECK_CEM 0
79#endif
80
81
82/* Check sanity in the compressed shadow memory machinery,
83 particularly in its caching innards. Unfortunately there's no
84 almost-zero-cost way to make them selectable at run time. Hence
85 set the #if 0 to #if 1 and rebuild if you want them. */
86#if 0
87# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
88# define inline __attribute__((noinline))
89 /* probably want to ditch -fomit-frame-pointer too */
90#else
91# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
92#endif
93
94
95/////////////////////////////////////////////////////////////////
96/////////////////////////////////////////////////////////////////
97// //
sewardjffce8152011-06-24 10:09:41 +000098// data decls: VtsID //
99// //
100/////////////////////////////////////////////////////////////////
101/////////////////////////////////////////////////////////////////
102
103/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
104 bits, since they have to be packed into the lowest 30 bits of an
105 SVal. */
106typedef UInt VtsID;
107#define VtsID_INVALID 0xFFFFFFFF
108
109
110
111/////////////////////////////////////////////////////////////////
112/////////////////////////////////////////////////////////////////
113// //
114// data decls: SVal //
115// //
116/////////////////////////////////////////////////////////////////
117/////////////////////////////////////////////////////////////////
118
119typedef ULong SVal;
120
121/* This value has special significance to the implementation, and callers
122 may not store it in the shadow memory. */
123#define SVal_INVALID (3ULL << 62)
124
125/* This is the default value for shadow memory. Initially the shadow
126 memory contains no accessible areas and so all reads produce this
127 value. TODO: make this caller-defineable. */
128#define SVal_NOACCESS (2ULL << 62)
129
130
131
132/////////////////////////////////////////////////////////////////
133/////////////////////////////////////////////////////////////////
134// //
135// data decls: ScalarTS //
136// //
137/////////////////////////////////////////////////////////////////
138/////////////////////////////////////////////////////////////////
139
140/* Scalar Timestamp. We have to store a lot of these, so there is
141 some effort to make them as small as possible. Logically they are
142 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
143 We pack it into 64 bits by representing the Thr* using a ThrID, a
144 small integer (18 bits), and a 46 bit integer for the timestamp
145 number. The 46/18 split is arbitary, but has the effect that
146 Helgrind can only handle programs that create 2^18 or fewer threads
147 over their entire lifetime, and have no more than 2^46 timestamp
148 ticks (synchronisation operations on the same thread).
149
150 This doesn't seem like much of a limitation. 2^46 ticks is
151 7.06e+13, and if each tick (optimistically) takes the machine 1000
152 cycles to process, then the minimum time to process that many ticks
153 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
154 but VTS ticks, which isn't realistic.
155
156 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
157 32 since a ThrID is a UInt. 29 comes from the fact that
158 'Thr_n_RCEC', which records information about old accesses, packs
159 not only a ThrID but also 2+1 other bits (access size and
160 writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
161
162 NB2: thrid values are issued upwards from 1024, and values less
163 than that aren't valid. This isn't per se necessary (any order
164 will do, so long as they are unique), but it does help ensure they
165 are less likely to get confused with the various other kinds of
166 small-integer thread ids drifting around (eg, TId). See also NB5.
167
168 NB3: this probably also relies on the fact that Thr's are never
169 deallocated -- they exist forever. Hence the 1-1 mapping from
170 Thr's to thrid values (set up in Thr__new) persists forever.
171
172 NB4: temp_max_sized_VTS is allocated at startup and never freed.
173 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
174 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
175 making the memory use for this go sky-high. With
176 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
177 like an OK tradeoff. If more than 256k threads need to be
178 supported, we could change SCALARTS_N_THRBITS to 20, which would
179 facilitate supporting 1 million threads at the cost of 8MB storage
180 for temp_max_sized_VTS.
181
182 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
183 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
184 must never be a valid ThrID. Given NB2 that's OK.
185*/
186#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
187
188#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
189typedef
190 struct {
191 ThrID thrid : SCALARTS_N_THRBITS;
192 ULong tym : SCALARTS_N_TYMBITS;
193 }
194 ScalarTS;
195
196#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
197
198
199
200/////////////////////////////////////////////////////////////////
201/////////////////////////////////////////////////////////////////
202// //
203// data decls: Filter //
204// //
205/////////////////////////////////////////////////////////////////
206/////////////////////////////////////////////////////////////////
207
208// baseline: 5, 9
209#define FI_LINE_SZB_LOG2 5
210#define FI_NUM_LINES_LOG2 10
211
212#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
213#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
214
215#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
216#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
217
218#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
219 & (Addr)(FI_NUM_LINES-1) )
220
221
222/* In the lines, each 8 bytes are treated individually, and are mapped
223 to a UShort. Regardless of endianness of the underlying machine,
224 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
225 the highest address.
226
227 Of each bit pair, the higher numbered bit is set if a R has been
228 seen, so the actual layout is:
229
230 15 14 ... 01 00
231
232 R W for addr+7 ... R W for addr+0
233
234 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
235*/
236
237/* tags are separated from lines. tags are Addrs and are
238 the base address of the line. */
239typedef
240 struct {
241 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
242 }
243 FiLine;
244
245typedef
246 struct {
247 Addr tags[FI_NUM_LINES];
248 FiLine lines[FI_NUM_LINES];
249 }
250 Filter;
251
252
253
254/////////////////////////////////////////////////////////////////
255/////////////////////////////////////////////////////////////////
256// //
257// data decls: Thr, ULong_n_EC //
258// //
259/////////////////////////////////////////////////////////////////
260/////////////////////////////////////////////////////////////////
261
262// Records stacks for H1 history mechanism (DRD-style)
263typedef
264 struct { ULong ull; ExeContext* ec; }
265 ULong_n_EC;
266
267
268/* How many of the above records to collect for each thread? Older
269 ones are dumped when we run out of space. 62.5k requires 1MB per
270 thread, since each ULong_n_EC record is 16 bytes long. When more
271 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
272 deleted to make space. Hence in the worst case we will be able to
273 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
274 Kw transitions (segments in this thread). For the current setting
275 that gives a guaranteed stack for at least the last 31.25k
276 segments. */
277#define N_KWs_N_STACKs_PER_THREAD 62500
278
279
280struct _Thr {
281 /* Current VTSs for this thread. They change as we go along. viR
282 is the VTS to be used for reads, viW for writes. Usually they
283 are the same, but can differ when we deal with reader-writer
284 locks. It is always the case that
285 VtsID__cmpLEQ(viW,viR) == True
286 that is, viW must be the same, or lagging behind, viR. */
287 VtsID viR;
288 VtsID viW;
289
290 /* Is initially False, and is set to True after the thread really
291 has done a low-level exit. When True, we expect to never see
292 any more memory references done by this thread. */
293 Bool llexit_done;
294
295 /* Is initially False, and is set to True after the thread has been
296 joined with (reaped by some other thread). After this point, we
297 do not expect to see any uses of .viR or .viW, so it is safe to
298 set them to VtsID_INVALID. */
299 Bool joinedwith_done;
300
301 /* A small integer giving a unique identity to this Thr. See
302 comments on the definition of ScalarTS for details. */
303 ThrID thrid : SCALARTS_N_THRBITS;
304
305 /* A filter that removes references for which we believe that
306 msmcread/msmcwrite will not change the state, nor report a
307 race. */
308 Filter* filter;
309
310 /* A pointer back to the top level Thread structure. There is a
311 1-1 mapping between Thread and Thr structures -- each Thr points
312 at its corresponding Thread, and vice versa. Really, Thr and
313 Thread should be merged into a single structure. */
314 Thread* hgthread;
315
316 /* The ULongs (scalar Kws) in this accumulate in strictly
317 increasing order, without duplicates. This is important because
318 we need to be able to find a given scalar Kw in this array
319 later, by binary search. */
320 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
321};
322
323
324
325/////////////////////////////////////////////////////////////////
326/////////////////////////////////////////////////////////////////
327// //
328// data decls: SO //
329// //
330/////////////////////////////////////////////////////////////////
331/////////////////////////////////////////////////////////////////
332
333// (UInt) `echo "Synchronisation object" | md5sum`
334#define SO_MAGIC 0x56b3c5b0U
335
336struct _SO {
337 struct _SO* admin_prev;
338 struct _SO* admin_next;
339 VtsID viR; /* r-clock of sender */
340 VtsID viW; /* w-clock of sender */
341 UInt magic;
342};
343
344
345
346/////////////////////////////////////////////////////////////////
347/////////////////////////////////////////////////////////////////
348// //
sewardj8f5374e2008-12-07 11:40:17 +0000349// Forward declarations //
350// //
351/////////////////////////////////////////////////////////////////
352/////////////////////////////////////////////////////////////////
353
sewardjf98e1c02008-10-25 16:22:41 +0000354/* fwds for
355 Globals needed by other parts of the library. These are set
356 once at startup and then never changed. */
357static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000358static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000359
sewardjffce8152011-06-24 10:09:41 +0000360/* misc fn and data fwdses */
361static void VtsID__rcinc ( VtsID ii );
362static void VtsID__rcdec ( VtsID ii );
363
364static inline Bool SVal__isC ( SVal s );
365static inline VtsID SVal__unC_Rmin ( SVal s );
366static inline VtsID SVal__unC_Wmin ( SVal s );
367static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
368
369/* A double linked list of all the SO's. */
370SO* admin_SO;
371
sewardjf98e1c02008-10-25 16:22:41 +0000372
373
374/////////////////////////////////////////////////////////////////
375/////////////////////////////////////////////////////////////////
376// //
377// SECTION BEGIN compressed shadow memory //
378// //
379/////////////////////////////////////////////////////////////////
380/////////////////////////////////////////////////////////////////
381
382#ifndef __HB_ZSM_H
383#define __HB_ZSM_H
384
sewardjf98e1c02008-10-25 16:22:41 +0000385/* Initialise the library. Once initialised, it will (or may) call
386 rcinc and rcdec in response to all the calls below, in order to
387 allow the user to do reference counting on the SVals stored herein.
388 It is important to understand, however, that due to internal
389 caching, the reference counts are in general inaccurate, and can be
390 both above or below the true reference count for an item. In
391 particular, the library may indicate that the reference count for
392 an item is zero, when in fact it is not.
393
394 To make the reference counting exact and therefore non-pointless,
395 call zsm_flush_cache. Immediately after it returns, the reference
396 counts for all items, as deduced by the caller by observing calls
397 to rcinc and rcdec, will be correct, and so any items with a zero
398 reference count may be freed (or at least considered to be
399 unreferenced by this library).
400*/
401static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
402
sewardj23f12002009-07-24 08:45:08 +0000403static void zsm_sset_range ( Addr, SizeT, SVal );
404static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000405static void zsm_flush_cache ( void );
406
407#endif /* ! __HB_ZSM_H */
408
409
sewardjf98e1c02008-10-25 16:22:41 +0000410/* Round a up to the next multiple of N. N must be a power of 2 */
411#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
412/* Round a down to the next multiple of N. N must be a power of 2 */
413#define ROUNDDN(a, N) ((a) & ~(N-1))
414
415
416
417/* ------ User-supplied RC functions ------ */
418static void(*rcinc)(SVal) = NULL;
419static void(*rcdec)(SVal) = NULL;
420
421
422/* ------ CacheLine ------ */
423
424#define N_LINE_BITS 6 /* must be >= 3 */
425#define N_LINE_ARANGE (1 << N_LINE_BITS)
426#define N_LINE_TREES (N_LINE_ARANGE >> 3)
427
428typedef
429 struct {
430 UShort descrs[N_LINE_TREES];
431 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
432 }
433 CacheLine;
434
435#define TREE_DESCR_16_0 (1<<0)
436#define TREE_DESCR_32_0 (1<<1)
437#define TREE_DESCR_16_1 (1<<2)
438#define TREE_DESCR_64 (1<<3)
439#define TREE_DESCR_16_2 (1<<4)
440#define TREE_DESCR_32_1 (1<<5)
441#define TREE_DESCR_16_3 (1<<6)
442#define TREE_DESCR_8_0 (1<<7)
443#define TREE_DESCR_8_1 (1<<8)
444#define TREE_DESCR_8_2 (1<<9)
445#define TREE_DESCR_8_3 (1<<10)
446#define TREE_DESCR_8_4 (1<<11)
447#define TREE_DESCR_8_5 (1<<12)
448#define TREE_DESCR_8_6 (1<<13)
449#define TREE_DESCR_8_7 (1<<14)
450#define TREE_DESCR_DTY (1<<15)
451
452typedef
453 struct {
454 SVal dict[4]; /* can represent up to 4 diff values in the line */
455 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
456 dict indexes */
457 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
458 LineF to use, and dict[2..] are also SVal_INVALID. */
459 }
460 LineZ; /* compressed rep for a cache line */
461
462typedef
463 struct {
464 Bool inUse;
465 SVal w64s[N_LINE_ARANGE];
466 }
467 LineF; /* full rep for a cache line */
468
469/* Shadow memory.
470 Primary map is a WordFM Addr SecMap*.
471 SecMaps cover some page-size-ish section of address space and hold
472 a compressed representation.
473 CacheLine-sized chunks of SecMaps are copied into a Cache, being
474 decompressed when moved into the cache and recompressed on the
475 way out. Because of this, the cache must operate as a writeback
476 cache, not a writethrough one.
477
478 Each SecMap must hold a power-of-2 number of CacheLines. Hence
479 N_SECMAP_BITS must >= N_LINE_BITS.
480*/
481#define N_SECMAP_BITS 13
482#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
483
484// # CacheLines held by a SecMap
485#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
486
487/* The data in the SecMap is held in the array of LineZs. Each LineZ
488 either carries the required data directly, in a compressed
489 representation, or it holds (in .dict[0]) an index to the LineF in
490 .linesF that holds the full representation.
491
492 Currently-unused LineF's have their .inUse bit set to zero.
493 Since each in-use LineF is referred to be exactly one LineZ,
494 the number of .linesZ[] that refer to .linesF should equal
495 the number of .linesF[] that have .inUse == True.
496
497 RC obligations: the RCs presented to the user include exactly
498 the values in:
499 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
500 * F reps that are in use (.inUse == True)
501
502 Hence the following actions at the following transitions are required:
503
504 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
505 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
506 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
507 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
508*/
509typedef
510 struct {
511 UInt magic;
512 LineZ linesZ[N_SECMAP_ZLINES];
513 LineF* linesF;
514 UInt linesF_size;
515 }
516 SecMap;
517
518#define SecMap_MAGIC 0x571e58cbU
519
sewardj5aa09bf2014-06-20 14:25:53 +0000520__attribute__((unused))
sewardjf98e1c02008-10-25 16:22:41 +0000521static inline Bool is_sane_SecMap ( SecMap* sm ) {
522 return sm != NULL && sm->magic == SecMap_MAGIC;
523}
524
525/* ------ Cache ------ */
526
527#define N_WAY_BITS 16
528#define N_WAY_NENT (1 << N_WAY_BITS)
529
530/* Each tag is the address of the associated CacheLine, rounded down
531 to a CacheLine address boundary. A CacheLine size must be a power
532 of 2 and must be 8 or more. Hence an easy way to initialise the
533 cache so it is empty is to set all the tag values to any value % 8
534 != 0, eg 1. This means all queries in the cache initially miss.
535 It does however require us to detect and not writeback, any line
536 with a bogus tag. */
537typedef
538 struct {
539 CacheLine lyns0[N_WAY_NENT];
540 Addr tags0[N_WAY_NENT];
541 }
542 Cache;
543
544static inline Bool is_valid_scache_tag ( Addr tag ) {
545 /* a valid tag should be naturally aligned to the start of
546 a CacheLine. */
547 return 0 == (tag & (N_LINE_ARANGE - 1));
548}
549
550
551/* --------- Primary data structures --------- */
552
553/* Shadow memory primary map */
554static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
555static Cache cache_shmem;
556
557
558static UWord stats__secmaps_search = 0; // # SM finds
559static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
560static UWord stats__secmaps_allocd = 0; // # SecMaps issued
561static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
562static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
563static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
564static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
565static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
566static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
567static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
568static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
569static UWord stats__cache_F_fetches = 0; // # F lines fetched
570static UWord stats__cache_F_wbacks = 0; // # F lines written back
571static UWord stats__cache_invals = 0; // # cache invals
572static UWord stats__cache_flushes = 0; // # cache flushes
573static UWord stats__cache_totrefs = 0; // # total accesses
574static UWord stats__cache_totmisses = 0; // # misses
575static ULong stats__cache_make_New_arange = 0; // total arange made New
576static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
577static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000578static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
579static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
580static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
581static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
582static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
583static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
584static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
585static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
586static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
587static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
588static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
589static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
590static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
591static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000592static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
593static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
594static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
595static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
596static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
597static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000598static UWord stats__vts__tick = 0; // # calls to VTS__tick
599static UWord stats__vts__join = 0; // # calls to VTS__join
600static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
601static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
sewardj7aa38a92011-02-27 23:04:12 +0000602
603// # calls to VTS__cmp_structural w/ slow case
604static UWord stats__vts__cmp_structural_slow = 0;
605
606// # calls to VTS__indexAt_SLOW
607static UWord stats__vts__indexat_slow = 0;
608
609// # calls to vts_set__find__or__clone_and_add
610static UWord stats__vts_set__focaa = 0;
611
612// # calls to vts_set__find__or__clone_and_add that lead to an
613// allocation
614static UWord stats__vts_set__focaa_a = 0;
sewardjc8028ad2010-05-05 09:34:42 +0000615
sewardjf98e1c02008-10-25 16:22:41 +0000616
617static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
618 return a & ~(N_SECMAP_ARANGE - 1);
619}
620static inline UWord shmem__get_SecMap_offset ( Addr a ) {
621 return a & (N_SECMAP_ARANGE - 1);
622}
623
624
625/*----------------------------------------------------------------*/
626/*--- map_shmem :: WordFM Addr SecMap ---*/
627/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
628/*----------------------------------------------------------------*/
629
630/*--------------- SecMap allocation --------------- */
631
632static HChar* shmem__bigchunk_next = NULL;
633static HChar* shmem__bigchunk_end1 = NULL;
634
635static void* shmem__bigchunk_alloc ( SizeT n )
636{
637 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
638 tl_assert(n > 0);
639 n = VG_ROUNDUP(n, 16);
640 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
641 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
642 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
643 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
644 if (0)
645 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
646 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
647 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
648 if (shmem__bigchunk_next == NULL)
649 VG_(out_of_memory_NORETURN)(
650 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
651 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
652 }
653 tl_assert(shmem__bigchunk_next);
654 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
655 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
656 shmem__bigchunk_next += n;
657 return shmem__bigchunk_next - n;
658}
659
660static SecMap* shmem__alloc_SecMap ( void )
661{
662 Word i, j;
663 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
664 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
665 tl_assert(sm);
666 sm->magic = SecMap_MAGIC;
667 for (i = 0; i < N_SECMAP_ZLINES; i++) {
668 sm->linesZ[i].dict[0] = SVal_NOACCESS;
669 sm->linesZ[i].dict[1] = SVal_INVALID;
670 sm->linesZ[i].dict[2] = SVal_INVALID;
671 sm->linesZ[i].dict[3] = SVal_INVALID;
672 for (j = 0; j < N_LINE_ARANGE/4; j++)
673 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
674 }
675 sm->linesF = NULL;
676 sm->linesF_size = 0;
677 stats__secmaps_allocd++;
678 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
679 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
680 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
681 return sm;
682}
683
684typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
685static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
686
687static SecMap* shmem__find_SecMap ( Addr ga )
688{
689 SecMap* sm = NULL;
690 Addr gaKey = shmem__round_to_SecMap_base(ga);
691 // Cache
692 stats__secmaps_search++;
693 if (LIKELY(gaKey == smCache[0].gaKey))
694 return smCache[0].sm;
695 if (LIKELY(gaKey == smCache[1].gaKey)) {
696 SMCacheEnt tmp = smCache[0];
697 smCache[0] = smCache[1];
698 smCache[1] = tmp;
699 return smCache[0].sm;
700 }
701 if (gaKey == smCache[2].gaKey) {
702 SMCacheEnt tmp = smCache[1];
703 smCache[1] = smCache[2];
704 smCache[2] = tmp;
705 return smCache[1].sm;
706 }
707 // end Cache
708 stats__secmaps_search_slow++;
709 if (VG_(lookupFM)( map_shmem,
710 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
711 tl_assert(sm != NULL);
712 smCache[2] = smCache[1];
713 smCache[1] = smCache[0];
714 smCache[0].gaKey = gaKey;
715 smCache[0].sm = sm;
716 } else {
717 tl_assert(sm == NULL);
718 }
719 return sm;
720}
721
722static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
723{
724 SecMap* sm = shmem__find_SecMap ( ga );
725 if (LIKELY(sm)) {
726 return sm;
727 } else {
728 /* create a new one */
729 Addr gaKey = shmem__round_to_SecMap_base(ga);
730 sm = shmem__alloc_SecMap();
731 tl_assert(sm);
732 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
733 return sm;
734 }
735}
736
737
738/* ------------ LineF and LineZ related ------------ */
739
740static void rcinc_LineF ( LineF* lineF ) {
741 UWord i;
742 tl_assert(lineF->inUse);
743 for (i = 0; i < N_LINE_ARANGE; i++)
744 rcinc(lineF->w64s[i]);
745}
746
747static void rcdec_LineF ( LineF* lineF ) {
748 UWord i;
749 tl_assert(lineF->inUse);
750 for (i = 0; i < N_LINE_ARANGE; i++)
751 rcdec(lineF->w64s[i]);
752}
753
754static void rcinc_LineZ ( LineZ* lineZ ) {
755 tl_assert(lineZ->dict[0] != SVal_INVALID);
756 rcinc(lineZ->dict[0]);
757 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
758 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
759 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
760}
761
762static void rcdec_LineZ ( LineZ* lineZ ) {
763 tl_assert(lineZ->dict[0] != SVal_INVALID);
764 rcdec(lineZ->dict[0]);
765 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
766 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
767 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
768}
769
770inline
771static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
772 Word bix, shft, mask, prep;
773 tl_assert(ix >= 0);
774 bix = ix >> 2;
775 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
776 mask = 3 << shft;
777 prep = b2 << shft;
778 arr[bix] = (arr[bix] & ~mask) | prep;
779}
780
781inline
782static UWord read_twobit_array ( UChar* arr, UWord ix ) {
783 Word bix, shft;
784 tl_assert(ix >= 0);
785 bix = ix >> 2;
786 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
787 return (arr[bix] >> shft) & 3;
788}
789
790/* Given address 'tag', find either the Z or F line containing relevant
791 data, so it can be read into the cache.
792*/
793static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
794 /*OUT*/LineF** fp, Addr tag ) {
795 LineZ* lineZ;
796 LineF* lineF;
797 UWord zix;
798 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
799 UWord smoff = shmem__get_SecMap_offset(tag);
800 /* since smoff is derived from a valid tag, it should be
801 cacheline-aligned. */
802 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
803 zix = smoff >> N_LINE_BITS;
804 tl_assert(zix < N_SECMAP_ZLINES);
805 lineZ = &sm->linesZ[zix];
806 lineF = NULL;
807 if (lineZ->dict[0] == SVal_INVALID) {
808 UInt fix = (UInt)lineZ->dict[1];
809 tl_assert(sm->linesF);
810 tl_assert(sm->linesF_size > 0);
811 tl_assert(fix >= 0 && fix < sm->linesF_size);
812 lineF = &sm->linesF[fix];
813 tl_assert(lineF->inUse);
814 lineZ = NULL;
815 }
816 *zp = lineZ;
817 *fp = lineF;
818}
819
820/* Given address 'tag', return the relevant SecMap and the index of
821 the LineZ within it, in the expectation that the line is to be
822 overwritten. Regardless of whether 'tag' is currently associated
823 with a Z or F representation, to rcdec on the current
824 representation, in recognition of the fact that the contents are
825 just about to be overwritten. */
826static __attribute__((noinline))
827void find_Z_for_writing ( /*OUT*/SecMap** smp,
828 /*OUT*/Word* zixp,
829 Addr tag ) {
830 LineZ* lineZ;
831 LineF* lineF;
832 UWord zix;
833 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
834 UWord smoff = shmem__get_SecMap_offset(tag);
835 /* since smoff is derived from a valid tag, it should be
836 cacheline-aligned. */
837 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
838 zix = smoff >> N_LINE_BITS;
839 tl_assert(zix < N_SECMAP_ZLINES);
840 lineZ = &sm->linesZ[zix];
841 lineF = NULL;
842 /* re RCs, we are freeing up this LineZ/LineF so that new data can
843 be parked in it. Hence have to rcdec it accordingly. */
844 /* If lineZ has an associated lineF, free it up. */
845 if (lineZ->dict[0] == SVal_INVALID) {
846 UInt fix = (UInt)lineZ->dict[1];
847 tl_assert(sm->linesF);
848 tl_assert(sm->linesF_size > 0);
849 tl_assert(fix >= 0 && fix < sm->linesF_size);
850 lineF = &sm->linesF[fix];
851 tl_assert(lineF->inUse);
852 rcdec_LineF(lineF);
853 lineF->inUse = False;
854 } else {
855 rcdec_LineZ(lineZ);
856 }
857 *smp = sm;
858 *zixp = zix;
859}
860
861static __attribute__((noinline))
862void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
863 UInt i, new_size;
864 LineF* nyu;
865
866 if (sm->linesF) {
867 tl_assert(sm->linesF_size > 0);
868 } else {
869 tl_assert(sm->linesF_size == 0);
870 }
871
872 if (sm->linesF) {
873 for (i = 0; i < sm->linesF_size; i++) {
874 if (!sm->linesF[i].inUse) {
875 *fixp = (Word)i;
876 return;
877 }
878 }
879 }
880
881 /* No free F line found. Expand existing array and try again. */
882 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
883 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
884 new_size * sizeof(LineF) );
sewardjf98e1c02008-10-25 16:22:41 +0000885
886 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
887 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
888 * sizeof(LineF);
889
890 if (0)
891 VG_(printf)("SM %p: expand F array from %d to %d\n",
892 sm, (Int)sm->linesF_size, new_size);
893
894 for (i = 0; i < new_size; i++)
895 nyu[i].inUse = False;
896
897 if (sm->linesF) {
898 for (i = 0; i < sm->linesF_size; i++) {
899 tl_assert(sm->linesF[i].inUse);
900 nyu[i] = sm->linesF[i];
901 }
902 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
903 HG_(free)(sm->linesF);
904 }
905
906 sm->linesF = nyu;
907 sm->linesF_size = new_size;
908
909 for (i = 0; i < sm->linesF_size; i++) {
910 if (!sm->linesF[i].inUse) {
911 *fixp = (Word)i;
912 return;
913 }
914 }
915
916 /*NOTREACHED*/
917 tl_assert(0);
918}
919
920
921/* ------------ CacheLine and implicit-tree related ------------ */
922
923__attribute__((unused))
924static void pp_CacheLine ( CacheLine* cl ) {
925 Word i;
926 if (!cl) {
927 VG_(printf)("%s","pp_CacheLine(NULL)\n");
928 return;
929 }
930 for (i = 0; i < N_LINE_TREES; i++)
931 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
932 for (i = 0; i < N_LINE_ARANGE; i++)
933 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
934}
935
936static UChar descr_to_validbits ( UShort descr )
937{
938 /* a.k.a Party Time for gcc's constant folder */
939# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
940 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
941 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
942 ( (b8_5) << 12) | ( (b8_4) << 11) | \
943 ( (b8_3) << 10) | ( (b8_2) << 9) | \
944 ( (b8_1) << 8) | ( (b8_0) << 7) | \
945 ( (b16_3) << 6) | ( (b32_1) << 5) | \
946 ( (b16_2) << 4) | ( (b64) << 3) | \
947 ( (b16_1) << 2) | ( (b32_0) << 1) | \
948 ( (b16_0) << 0) ) )
949
950# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
951 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
952 ( (bit5) << 5) | ( (bit4) << 4) | \
953 ( (bit3) << 3) | ( (bit2) << 2) | \
954 ( (bit1) << 1) | ( (bit0) << 0) ) )
955
956 /* these should all get folded out at compile time */
957 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
958 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
959 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
960 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
961 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
962 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
963 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
964 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
965 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
966
967 switch (descr) {
968 /*
969 +--------------------------------- TREE_DESCR_8_7
970 | +------------------- TREE_DESCR_8_0
971 | | +---------------- TREE_DESCR_16_3
972 | | | +-------------- TREE_DESCR_32_1
973 | | | | +------------ TREE_DESCR_16_2
974 | | | | | +--------- TREE_DESCR_64
975 | | | | | | +------ TREE_DESCR_16_1
976 | | | | | | | +---- TREE_DESCR_32_0
977 | | | | | | | | +-- TREE_DESCR_16_0
978 | | | | | | | | |
979 | | | | | | | | | GRANULARITY, 7 -> 0 */
980 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
981 return BYTE(1,1,1,1,1,1,1,1);
982 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
983 return BYTE(1,1,0,1,1,1,1,1);
984 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
985 return BYTE(0,1,1,1,1,1,1,1);
986 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
987 return BYTE(0,1,0,1,1,1,1,1);
988
989 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
990 return BYTE(1,1,1,1,1,1,0,1);
991 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
992 return BYTE(1,1,0,1,1,1,0,1);
993 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
994 return BYTE(0,1,1,1,1,1,0,1);
995 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
996 return BYTE(0,1,0,1,1,1,0,1);
997
998 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
999 return BYTE(1,1,1,1,0,1,1,1);
1000 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1001 return BYTE(1,1,0,1,0,1,1,1);
1002 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1003 return BYTE(0,1,1,1,0,1,1,1);
1004 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1005 return BYTE(0,1,0,1,0,1,1,1);
1006
1007 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1008 return BYTE(1,1,1,1,0,1,0,1);
1009 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1010 return BYTE(1,1,0,1,0,1,0,1);
1011 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1012 return BYTE(0,1,1,1,0,1,0,1);
1013 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1014 return BYTE(0,1,0,1,0,1,0,1);
1015
1016 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1017 return BYTE(0,0,0,1,1,1,1,1);
1018 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1019 return BYTE(0,0,0,1,1,1,0,1);
1020 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1021 return BYTE(0,0,0,1,0,1,1,1);
1022 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1023 return BYTE(0,0,0,1,0,1,0,1);
1024
1025 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1026 return BYTE(1,1,1,1,0,0,0,1);
1027 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1028 return BYTE(1,1,0,1,0,0,0,1);
1029 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1030 return BYTE(0,1,1,1,0,0,0,1);
1031 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1032 return BYTE(0,1,0,1,0,0,0,1);
1033
1034 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1035 return BYTE(0,0,0,1,0,0,0,1);
1036
1037 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1038 return BYTE(0,0,0,0,0,0,0,1);
1039
1040 default: return BYTE(0,0,0,0,0,0,0,0);
1041 /* INVALID - any valid descr produces at least one
1042 valid bit in tree[0..7]*/
1043 }
1044 /* NOTREACHED*/
1045 tl_assert(0);
1046
1047# undef DESCR
1048# undef BYTE
1049}
1050
1051__attribute__((unused))
1052static Bool is_sane_Descr ( UShort descr ) {
1053 return descr_to_validbits(descr) != 0;
1054}
1055
1056static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1057 VG_(sprintf)(dst,
1058 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1059 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1060 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1061 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1062 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1063 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1064 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1065 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1066 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1067 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1068 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1069 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1070 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1071 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1072 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1073 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1074 );
1075}
1076static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1077 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1078 (Int)((byte & 128) ? 1 : 0),
1079 (Int)((byte & 64) ? 1 : 0),
1080 (Int)((byte & 32) ? 1 : 0),
1081 (Int)((byte & 16) ? 1 : 0),
1082 (Int)((byte & 8) ? 1 : 0),
1083 (Int)((byte & 4) ? 1 : 0),
1084 (Int)((byte & 2) ? 1 : 0),
1085 (Int)((byte & 1) ? 1 : 0)
1086 );
1087}
1088
1089static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1090 Word i;
1091 UChar validbits = descr_to_validbits(descr);
florian7b7d5942014-12-19 20:29:22 +00001092 HChar buf[128], buf2[128]; // large enough
sewardjf98e1c02008-10-25 16:22:41 +00001093 if (validbits == 0)
1094 goto bad;
1095 for (i = 0; i < 8; i++) {
1096 if (validbits & (1<<i)) {
1097 if (tree[i] == SVal_INVALID)
1098 goto bad;
1099 } else {
1100 if (tree[i] != SVal_INVALID)
1101 goto bad;
1102 }
1103 }
1104 return True;
1105 bad:
1106 sprintf_Descr( buf, descr );
1107 sprintf_Byte( buf2, validbits );
1108 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1109 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1110 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1111 for (i = 0; i < 8; i++)
1112 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1113 VG_(printf)("%s","}\n");
1114 return 0;
1115}
1116
1117static Bool is_sane_CacheLine ( CacheLine* cl )
1118{
1119 Word tno, cloff;
1120
1121 if (!cl) goto bad;
1122
1123 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1124 UShort descr = cl->descrs[tno];
1125 SVal* tree = &cl->svals[cloff];
1126 if (!is_sane_Descr_and_Tree(descr, tree))
1127 goto bad;
1128 }
1129 tl_assert(cloff == N_LINE_ARANGE);
1130 return True;
1131 bad:
1132 pp_CacheLine(cl);
1133 return False;
1134}
1135
1136static UShort normalise_tree ( /*MOD*/SVal* tree )
1137{
1138 UShort descr;
1139 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1140 particular no zeroes. */
1141 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1142 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1143 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1144 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
1145 tl_assert(0);
1146
1147 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1148 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1149 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1150 /* build 16-bit layer */
1151 if (tree[1] == tree[0]) {
1152 tree[1] = SVal_INVALID;
1153 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1154 descr |= TREE_DESCR_16_0;
1155 }
1156 if (tree[3] == tree[2]) {
1157 tree[3] = SVal_INVALID;
1158 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1159 descr |= TREE_DESCR_16_1;
1160 }
1161 if (tree[5] == tree[4]) {
1162 tree[5] = SVal_INVALID;
1163 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1164 descr |= TREE_DESCR_16_2;
1165 }
1166 if (tree[7] == tree[6]) {
1167 tree[7] = SVal_INVALID;
1168 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1169 descr |= TREE_DESCR_16_3;
1170 }
1171 /* build 32-bit layer */
1172 if (tree[2] == tree[0]
1173 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1174 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1175 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1176 descr |= TREE_DESCR_32_0;
1177 }
1178 if (tree[6] == tree[4]
1179 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1180 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1181 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1182 descr |= TREE_DESCR_32_1;
1183 }
1184 /* build 64-bit layer */
1185 if (tree[4] == tree[0]
1186 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1187 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1188 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1189 descr |= TREE_DESCR_64;
1190 }
1191 return descr;
1192}
1193
1194/* This takes a cacheline where all the data is at the leaves
1195 (w8[..]) and builds a correctly normalised tree. */
1196static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1197{
1198 Word tno, cloff;
1199 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1200 SVal* tree = &cl->svals[cloff];
1201 cl->descrs[tno] = normalise_tree( tree );
1202 }
1203 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +00001204 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001205 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1206 stats__cline_normalises++;
1207}
1208
1209
1210typedef struct { UChar count; SVal sval; } CountedSVal;
1211
1212static
1213void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1214 /*OUT*/Word* dstUsedP,
1215 Word nDst, CacheLine* src )
1216{
1217 Word tno, cloff, dstUsed;
1218
1219 tl_assert(nDst == N_LINE_ARANGE);
1220 dstUsed = 0;
1221
1222 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1223 UShort descr = src->descrs[tno];
1224 SVal* tree = &src->svals[cloff];
1225
1226 /* sequentialise the tree described by (descr,tree). */
1227# define PUT(_n,_v) \
1228 do { dst[dstUsed ].count = (_n); \
1229 dst[dstUsed++].sval = (_v); \
1230 } while (0)
1231
1232 /* byte 0 */
1233 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1234 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1235 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1236 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1237 /* byte 1 */
1238 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1239 /* byte 2 */
1240 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1241 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1242 /* byte 3 */
1243 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1244 /* byte 4 */
1245 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1246 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1247 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1248 /* byte 5 */
1249 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1250 /* byte 6 */
1251 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1252 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1253 /* byte 7 */
1254 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1255
1256# undef PUT
1257 /* END sequentialise the tree described by (descr,tree). */
1258
1259 }
1260 tl_assert(cloff == N_LINE_ARANGE);
1261 tl_assert(dstUsed <= nDst);
1262
1263 *dstUsedP = dstUsed;
1264}
1265
1266/* Write the cacheline 'wix' to backing store. Where it ends up
1267 is determined by its tag field. */
1268static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1269{
1270 Word i, j, k, m;
1271 Addr tag;
1272 SecMap* sm;
1273 CacheLine* cl;
1274 LineZ* lineZ;
1275 LineF* lineF;
1276 Word zix, fix, csvalsUsed;
1277 CountedSVal csvals[N_LINE_ARANGE];
1278 SVal sv;
1279
1280 if (0)
1281 VG_(printf)("scache wback line %d\n", (Int)wix);
1282
1283 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1284
1285 tag = cache_shmem.tags0[wix];
1286 cl = &cache_shmem.lyns0[wix];
1287
1288 /* The cache line may have been invalidated; if so, ignore it. */
1289 if (!is_valid_scache_tag(tag))
1290 return;
1291
1292 /* Where are we going to put it? */
1293 sm = NULL;
1294 lineZ = NULL;
1295 lineF = NULL;
1296 zix = fix = -1;
1297
1298 /* find the Z line to write in and rcdec it or the associated F
1299 line. */
1300 find_Z_for_writing( &sm, &zix, tag );
1301
1302 tl_assert(sm);
1303 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1304 lineZ = &sm->linesZ[zix];
1305
1306 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001307 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001308 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1309
1310 csvalsUsed = -1;
1311 sequentialise_CacheLine( csvals, &csvalsUsed,
1312 N_LINE_ARANGE, cl );
1313 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1314 if (0) VG_(printf)("%lu ", csvalsUsed);
1315
1316 lineZ->dict[0] = lineZ->dict[1]
1317 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1318
1319 /* i indexes actual shadow values, k is cursor in csvals */
1320 i = 0;
1321 for (k = 0; k < csvalsUsed; k++) {
1322
1323 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001324 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001325 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1326 /* do we already have it? */
1327 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1328 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1329 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1330 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1331 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001332 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001333 tl_assert(sv != SVal_INVALID);
1334 if (lineZ->dict[0]
1335 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1336 if (lineZ->dict[1]
1337 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1338 if (lineZ->dict[2]
1339 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1340 if (lineZ->dict[3]
1341 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1342 break; /* we'll have to use the f rep */
1343 dict_ok:
1344 m = csvals[k].count;
1345 if (m == 8) {
1346 write_twobit_array( lineZ->ix2s, i+0, j );
1347 write_twobit_array( lineZ->ix2s, i+1, j );
1348 write_twobit_array( lineZ->ix2s, i+2, j );
1349 write_twobit_array( lineZ->ix2s, i+3, j );
1350 write_twobit_array( lineZ->ix2s, i+4, j );
1351 write_twobit_array( lineZ->ix2s, i+5, j );
1352 write_twobit_array( lineZ->ix2s, i+6, j );
1353 write_twobit_array( lineZ->ix2s, i+7, j );
1354 i += 8;
1355 }
1356 else if (m == 4) {
1357 write_twobit_array( lineZ->ix2s, i+0, j );
1358 write_twobit_array( lineZ->ix2s, i+1, j );
1359 write_twobit_array( lineZ->ix2s, i+2, j );
1360 write_twobit_array( lineZ->ix2s, i+3, j );
1361 i += 4;
1362 }
1363 else if (m == 1) {
1364 write_twobit_array( lineZ->ix2s, i+0, j );
1365 i += 1;
1366 }
1367 else if (m == 2) {
1368 write_twobit_array( lineZ->ix2s, i+0, j );
1369 write_twobit_array( lineZ->ix2s, i+1, j );
1370 i += 2;
1371 }
1372 else {
1373 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1374 }
1375
1376 }
1377
1378 if (LIKELY(i == N_LINE_ARANGE)) {
1379 /* Construction of the compressed representation was
1380 successful. */
1381 rcinc_LineZ(lineZ);
1382 stats__cache_Z_wbacks++;
1383 } else {
1384 /* Cannot use the compressed(z) representation. Use the full(f)
1385 rep instead. */
1386 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1387 alloc_F_for_writing( sm, &fix );
1388 tl_assert(sm->linesF);
1389 tl_assert(sm->linesF_size > 0);
1390 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1391 lineF = &sm->linesF[fix];
1392 tl_assert(!lineF->inUse);
1393 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1394 lineZ->dict[1] = (SVal)fix;
1395 lineF->inUse = True;
1396 i = 0;
1397 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001398 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001399 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1400 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001401 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001402 tl_assert(sv != SVal_INVALID);
1403 for (m = csvals[k].count; m > 0; m--) {
1404 lineF->w64s[i] = sv;
1405 i++;
1406 }
1407 }
1408 tl_assert(i == N_LINE_ARANGE);
1409 rcinc_LineF(lineF);
1410 stats__cache_F_wbacks++;
1411 }
sewardjf98e1c02008-10-25 16:22:41 +00001412}
1413
1414/* Fetch the cacheline 'wix' from the backing store. The tag
1415 associated with 'wix' is assumed to have already been filled in;
1416 hence that is used to determine where in the backing store to read
1417 from. */
1418static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1419{
1420 Word i;
1421 Addr tag;
1422 CacheLine* cl;
1423 LineZ* lineZ;
1424 LineF* lineF;
1425
1426 if (0)
1427 VG_(printf)("scache fetch line %d\n", (Int)wix);
1428
1429 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1430
1431 tag = cache_shmem.tags0[wix];
1432 cl = &cache_shmem.lyns0[wix];
1433
1434 /* reject nonsense requests */
1435 tl_assert(is_valid_scache_tag(tag));
1436
1437 lineZ = NULL;
1438 lineF = NULL;
1439 find_ZF_for_reading( &lineZ, &lineF, tag );
1440 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1441
1442 /* expand the data into the bottom layer of the tree, then get
1443 cacheline_normalise to build the descriptor array. */
1444 if (lineF) {
1445 tl_assert(lineF->inUse);
1446 for (i = 0; i < N_LINE_ARANGE; i++) {
1447 cl->svals[i] = lineF->w64s[i];
1448 }
1449 stats__cache_F_fetches++;
1450 } else {
1451 for (i = 0; i < N_LINE_ARANGE; i++) {
1452 SVal sv;
1453 UWord ix = read_twobit_array( lineZ->ix2s, i );
1454 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1455 sv = lineZ->dict[ix];
1456 tl_assert(sv != SVal_INVALID);
1457 cl->svals[i] = sv;
1458 }
1459 stats__cache_Z_fetches++;
1460 }
1461 normalise_CacheLine( cl );
1462}
1463
1464static void shmem__invalidate_scache ( void ) {
1465 Word wix;
1466 if (0) VG_(printf)("%s","scache inval\n");
1467 tl_assert(!is_valid_scache_tag(1));
1468 for (wix = 0; wix < N_WAY_NENT; wix++) {
1469 cache_shmem.tags0[wix] = 1/*INVALID*/;
1470 }
1471 stats__cache_invals++;
1472}
1473
1474static void shmem__flush_and_invalidate_scache ( void ) {
1475 Word wix;
1476 Addr tag;
1477 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1478 tl_assert(!is_valid_scache_tag(1));
1479 for (wix = 0; wix < N_WAY_NENT; wix++) {
1480 tag = cache_shmem.tags0[wix];
1481 if (tag == 1/*INVALID*/) {
1482 /* already invalid; nothing to do */
1483 } else {
1484 tl_assert(is_valid_scache_tag(tag));
1485 cacheline_wback( wix );
1486 }
1487 cache_shmem.tags0[wix] = 1/*INVALID*/;
1488 }
1489 stats__cache_flushes++;
1490 stats__cache_invals++;
1491}
1492
1493
1494static inline Bool aligned16 ( Addr a ) {
1495 return 0 == (a & 1);
1496}
1497static inline Bool aligned32 ( Addr a ) {
1498 return 0 == (a & 3);
1499}
1500static inline Bool aligned64 ( Addr a ) {
1501 return 0 == (a & 7);
1502}
1503static inline UWord get_cacheline_offset ( Addr a ) {
1504 return (UWord)(a & (N_LINE_ARANGE - 1));
1505}
1506static inline Addr cacheline_ROUNDUP ( Addr a ) {
1507 return ROUNDUP(a, N_LINE_ARANGE);
1508}
1509static inline Addr cacheline_ROUNDDN ( Addr a ) {
1510 return ROUNDDN(a, N_LINE_ARANGE);
1511}
1512static inline UWord get_treeno ( Addr a ) {
1513 return get_cacheline_offset(a) >> 3;
1514}
1515static inline UWord get_tree_offset ( Addr a ) {
1516 return a & 7;
1517}
1518
1519static __attribute__((noinline))
1520 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1521static inline CacheLine* get_cacheline ( Addr a )
1522{
1523 /* tag is 'a' with the in-line offset masked out,
1524 eg a[31]..a[4] 0000 */
1525 Addr tag = a & ~(N_LINE_ARANGE - 1);
1526 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1527 stats__cache_totrefs++;
1528 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1529 return &cache_shmem.lyns0[wix];
1530 } else {
1531 return get_cacheline_MISS( a );
1532 }
1533}
1534
1535static __attribute__((noinline))
1536 CacheLine* get_cacheline_MISS ( Addr a )
1537{
1538 /* tag is 'a' with the in-line offset masked out,
1539 eg a[31]..a[4] 0000 */
1540
1541 CacheLine* cl;
1542 Addr* tag_old_p;
1543 Addr tag = a & ~(N_LINE_ARANGE - 1);
1544 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1545
1546 tl_assert(tag != cache_shmem.tags0[wix]);
1547
1548 /* Dump the old line into the backing store. */
1549 stats__cache_totmisses++;
1550
1551 cl = &cache_shmem.lyns0[wix];
1552 tag_old_p = &cache_shmem.tags0[wix];
1553
1554 if (is_valid_scache_tag( *tag_old_p )) {
1555 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001556 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001557 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1558 cacheline_wback( wix );
1559 }
1560 /* and reload the new one */
1561 *tag_old_p = tag;
1562 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001563 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001564 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1565 return cl;
1566}
1567
1568static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1569 stats__cline_64to32pulldown++;
1570 switch (toff) {
1571 case 0: case 4:
1572 tl_assert(descr & TREE_DESCR_64);
1573 tree[4] = tree[0];
1574 descr &= ~TREE_DESCR_64;
1575 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1576 break;
1577 default:
1578 tl_assert(0);
1579 }
1580 return descr;
1581}
1582
1583static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1584 stats__cline_32to16pulldown++;
1585 switch (toff) {
1586 case 0: case 2:
1587 if (!(descr & TREE_DESCR_32_0)) {
1588 descr = pulldown_to_32(tree, 0, descr);
1589 }
1590 tl_assert(descr & TREE_DESCR_32_0);
1591 tree[2] = tree[0];
1592 descr &= ~TREE_DESCR_32_0;
1593 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1594 break;
1595 case 4: case 6:
1596 if (!(descr & TREE_DESCR_32_1)) {
1597 descr = pulldown_to_32(tree, 4, descr);
1598 }
1599 tl_assert(descr & TREE_DESCR_32_1);
1600 tree[6] = tree[4];
1601 descr &= ~TREE_DESCR_32_1;
1602 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1603 break;
1604 default:
1605 tl_assert(0);
1606 }
1607 return descr;
1608}
1609
1610static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1611 stats__cline_16to8pulldown++;
1612 switch (toff) {
1613 case 0: case 1:
1614 if (!(descr & TREE_DESCR_16_0)) {
1615 descr = pulldown_to_16(tree, 0, descr);
1616 }
1617 tl_assert(descr & TREE_DESCR_16_0);
1618 tree[1] = tree[0];
1619 descr &= ~TREE_DESCR_16_0;
1620 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1621 break;
1622 case 2: case 3:
1623 if (!(descr & TREE_DESCR_16_1)) {
1624 descr = pulldown_to_16(tree, 2, descr);
1625 }
1626 tl_assert(descr & TREE_DESCR_16_1);
1627 tree[3] = tree[2];
1628 descr &= ~TREE_DESCR_16_1;
1629 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1630 break;
1631 case 4: case 5:
1632 if (!(descr & TREE_DESCR_16_2)) {
1633 descr = pulldown_to_16(tree, 4, descr);
1634 }
1635 tl_assert(descr & TREE_DESCR_16_2);
1636 tree[5] = tree[4];
1637 descr &= ~TREE_DESCR_16_2;
1638 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1639 break;
1640 case 6: case 7:
1641 if (!(descr & TREE_DESCR_16_3)) {
1642 descr = pulldown_to_16(tree, 6, descr);
1643 }
1644 tl_assert(descr & TREE_DESCR_16_3);
1645 tree[7] = tree[6];
1646 descr &= ~TREE_DESCR_16_3;
1647 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1648 break;
1649 default:
1650 tl_assert(0);
1651 }
1652 return descr;
1653}
1654
1655
1656static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1657 UShort mask;
1658 switch (toff) {
1659 case 0:
1660 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1661 tl_assert( (descr & mask) == mask );
1662 descr &= ~mask;
1663 descr |= TREE_DESCR_16_0;
1664 break;
1665 case 2:
1666 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1667 tl_assert( (descr & mask) == mask );
1668 descr &= ~mask;
1669 descr |= TREE_DESCR_16_1;
1670 break;
1671 case 4:
1672 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1673 tl_assert( (descr & mask) == mask );
1674 descr &= ~mask;
1675 descr |= TREE_DESCR_16_2;
1676 break;
1677 case 6:
1678 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1679 tl_assert( (descr & mask) == mask );
1680 descr &= ~mask;
1681 descr |= TREE_DESCR_16_3;
1682 break;
1683 default:
1684 tl_assert(0);
1685 }
1686 return descr;
1687}
1688
1689static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1690 UShort mask;
1691 switch (toff) {
1692 case 0:
1693 if (!(descr & TREE_DESCR_16_0))
1694 descr = pullup_descr_to_16(descr, 0);
1695 if (!(descr & TREE_DESCR_16_1))
1696 descr = pullup_descr_to_16(descr, 2);
1697 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1698 tl_assert( (descr & mask) == mask );
1699 descr &= ~mask;
1700 descr |= TREE_DESCR_32_0;
1701 break;
1702 case 4:
1703 if (!(descr & TREE_DESCR_16_2))
1704 descr = pullup_descr_to_16(descr, 4);
1705 if (!(descr & TREE_DESCR_16_3))
1706 descr = pullup_descr_to_16(descr, 6);
1707 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1708 tl_assert( (descr & mask) == mask );
1709 descr &= ~mask;
1710 descr |= TREE_DESCR_32_1;
1711 break;
1712 default:
1713 tl_assert(0);
1714 }
1715 return descr;
1716}
1717
1718static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1719 switch (toff) {
1720 case 0: case 4:
1721 return 0 != (descr & TREE_DESCR_64);
1722 default:
1723 tl_assert(0);
1724 }
1725}
1726
1727static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1728 switch (toff) {
1729 case 0:
1730 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1731 case 2:
1732 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1733 case 4:
1734 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1735 case 6:
1736 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1737 default:
1738 tl_assert(0);
1739 }
1740}
1741
1742/* ------------ Cache management ------------ */
1743
1744static void zsm_flush_cache ( void )
1745{
1746 shmem__flush_and_invalidate_scache();
1747}
1748
1749
1750static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1751{
1752 tl_assert( sizeof(UWord) == sizeof(Addr) );
1753
1754 rcinc = p_rcinc;
1755 rcdec = p_rcdec;
1756
1757 tl_assert(map_shmem == NULL);
1758 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1759 HG_(free),
1760 NULL/*unboxed UWord cmp*/);
sewardjf98e1c02008-10-25 16:22:41 +00001761 shmem__invalidate_scache();
1762
1763 /* a SecMap must contain an integral number of CacheLines */
1764 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1765 /* also ... a CacheLine holds an integral number of trees */
1766 tl_assert(0 == (N_LINE_ARANGE % 8));
1767}
1768
1769/////////////////////////////////////////////////////////////////
1770/////////////////////////////////////////////////////////////////
1771// //
1772// SECTION END compressed shadow memory //
1773// //
1774/////////////////////////////////////////////////////////////////
1775/////////////////////////////////////////////////////////////////
1776
1777
1778
1779/////////////////////////////////////////////////////////////////
1780/////////////////////////////////////////////////////////////////
1781// //
1782// SECTION BEGIN vts primitives //
1783// //
1784/////////////////////////////////////////////////////////////////
1785/////////////////////////////////////////////////////////////////
1786
sewardjf98e1c02008-10-25 16:22:41 +00001787
sewardje4cce742011-02-24 15:25:24 +00001788/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1789 being compact stand-ins for Thr*'s. Use these functions to map
1790 between them. */
1791static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
1792static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
1793
sewardje4cce742011-02-24 15:25:24 +00001794__attribute__((noreturn))
1795static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
1796{
1797 if (due_to_nThrs) {
florian6bf37262012-10-21 03:23:36 +00001798 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001799 "\n"
1800 "Helgrind: cannot continue, run aborted: too many threads.\n"
1801 "Sorry. Helgrind can only handle programs that create\n"
1802 "%'llu or fewer threads over their entire lifetime.\n"
1803 "\n";
sewardj03e7d272011-05-04 09:08:34 +00001804 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
sewardje4cce742011-02-24 15:25:24 +00001805 } else {
florian6bf37262012-10-21 03:23:36 +00001806 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001807 "\n"
1808 "Helgrind: cannot continue, run aborted: too many\n"
1809 "synchronisation events. Sorry. Helgrind can only handle\n"
1810 "programs which perform %'llu or fewer\n"
1811 "inter-thread synchronisation events (locks, unlocks, etc).\n"
1812 "\n";
1813 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
1814 }
1815 VG_(exit)(1);
1816 /*NOTREACHED*/
1817 tl_assert(0); /*wtf?!*/
1818}
1819
1820
sewardjffce8152011-06-24 10:09:41 +00001821/* The dead thread (ThrID, actually) table. A thread may only be
1822 listed here if we have been notified thereof by libhb_async_exit.
1823 New entries are added at the end. The order isn't important, but
1824 the ThrID values must be unique. This table lists the identity of
1825 all threads that have ever died -- none are ever removed. We keep
1826 this table so as to be able to prune entries from VTSs. We don't
1827 actually need to keep the set of threads that have ever died --
1828 only the threads that have died since the previous round of
1829 pruning. But it's useful for sanity check purposes to keep the
1830 entire set, so we do. */
1831static XArray* /* of ThrID */ verydead_thread_table = NULL;
1832
1833/* Arbitrary total ordering on ThrIDs. */
florian6bd9dc12012-11-23 16:17:43 +00001834static Int cmp__ThrID ( const void* v1, const void* v2 ) {
1835 ThrID id1 = *(const ThrID*)v1;
1836 ThrID id2 = *(const ThrID*)v2;
sewardjffce8152011-06-24 10:09:41 +00001837 if (id1 < id2) return -1;
1838 if (id1 > id2) return 1;
1839 return 0;
1840}
1841
1842static void verydead_thread_table_init ( void )
1843{
1844 tl_assert(!verydead_thread_table);
1845 verydead_thread_table
1846 = VG_(newXA)( HG_(zalloc),
1847 "libhb.verydead_thread_table_init.1",
1848 HG_(free), sizeof(ThrID) );
sewardjffce8152011-06-24 10:09:41 +00001849 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
1850}
1851
sewardjf98e1c02008-10-25 16:22:41 +00001852
1853/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1854 a backlink for the caller's convenience. Since we have no idea
1855 what to set that to in the library, it always gets set to
1856 VtsID_INVALID. */
1857typedef
1858 struct {
sewardj7aa38a92011-02-27 23:04:12 +00001859 VtsID id;
1860 UInt usedTS;
1861 UInt sizeTS;
1862 ScalarTS ts[0];
sewardjf98e1c02008-10-25 16:22:41 +00001863 }
1864 VTS;
1865
sewardj7aa38a92011-02-27 23:04:12 +00001866/* Allocate a VTS capable of storing 'sizeTS' entries. */
florian6bd9dc12012-11-23 16:17:43 +00001867static VTS* VTS__new ( const HChar* who, UInt sizeTS );
sewardjf98e1c02008-10-25 16:22:41 +00001868
sewardjffce8152011-06-24 10:09:41 +00001869/* Make a clone of 'vts', sizing the new array to exactly match the
sewardj7aa38a92011-02-27 23:04:12 +00001870 number of ScalarTSs present. */
florian6bd9dc12012-11-23 16:17:43 +00001871static VTS* VTS__clone ( const HChar* who, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001872
sewardjffce8152011-06-24 10:09:41 +00001873/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
1874 array is sized exactly to hold the number of required elements.
1875 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
1876 must be in strictly increasing order. */
florian6bd9dc12012-11-23 16:17:43 +00001877static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
sewardjffce8152011-06-24 10:09:41 +00001878
sewardjf98e1c02008-10-25 16:22:41 +00001879/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00001880static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001881
sewardj7aa38a92011-02-27 23:04:12 +00001882/* Create a new singleton VTS in 'out'. Caller must have
1883 pre-allocated 'out' sufficiently big to hold the result in all
1884 possible cases. */
1885static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00001886
sewardj7aa38a92011-02-27 23:04:12 +00001887/* Create in 'out' a VTS which is the same as 'vts' except with
1888 vts[me]++, so to speak. Caller must have pre-allocated 'out'
1889 sufficiently big to hold the result in all possible cases. */
1890static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001891
sewardj7aa38a92011-02-27 23:04:12 +00001892/* Create in 'out' a VTS which is the join (max) of 'a' and
1893 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
1894 the result in all possible cases. */
1895static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001896
sewardj23f12002009-07-24 08:45:08 +00001897/* Compute the partial ordering relation of the two args. Although we
1898 could be completely general and return an enumeration value (EQ,
1899 LT, GT, UN), in fact we only need LEQ, and so we may as well
1900 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00001901
sewardje4cce742011-02-24 15:25:24 +00001902 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
1903 invald ThrID). In the latter case, the returned ThrID indicates
1904 the discovered point for which they are not. There may be more
1905 than one such point, but we only care about seeing one of them, not
1906 all of them. This rather strange convention is used because
1907 sometimes we want to know the actual index at which they first
1908 differ. */
1909static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001910
1911/* Compute an arbitrary structural (total) ordering on the two args,
1912 based on their VCs, so they can be looked up in a table, tree, etc.
1913 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00001914static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001915
florianb28fe892014-10-28 20:52:07 +00001916/* Debugging only. Display the given VTS. */
1917static void VTS__show ( const VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001918
1919/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00001920static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001921
sewardjffce8152011-06-24 10:09:41 +00001922/* Notify the VTS machinery that a thread has been declared
1923 comprehensively dead: that is, it has done an async exit AND it has
1924 been joined with. This should ensure that its local clocks (.viR
1925 and .viW) will never again change, and so all mentions of this
1926 thread from all VTSs in the system may be removed. */
1927static void VTS__declare_thread_very_dead ( Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001928
1929/*--------------- to do with Vector Timestamps ---------------*/
1930
sewardjf98e1c02008-10-25 16:22:41 +00001931static Bool is_sane_VTS ( VTS* vts )
1932{
1933 UWord i, n;
1934 ScalarTS *st1, *st2;
1935 if (!vts) return False;
sewardj555fc572011-02-27 23:39:53 +00001936 if (vts->usedTS > vts->sizeTS) return False;
sewardj7aa38a92011-02-27 23:04:12 +00001937 n = vts->usedTS;
1938 if (n == 1) {
1939 st1 = &vts->ts[0];
1940 if (st1->tym == 0)
1941 return False;
1942 }
1943 else
sewardjf98e1c02008-10-25 16:22:41 +00001944 if (n >= 2) {
1945 for (i = 0; i < n-1; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00001946 st1 = &vts->ts[i];
1947 st2 = &vts->ts[i+1];
sewardje4cce742011-02-24 15:25:24 +00001948 if (st1->thrid >= st2->thrid)
sewardjf98e1c02008-10-25 16:22:41 +00001949 return False;
1950 if (st1->tym == 0 || st2->tym == 0)
1951 return False;
1952 }
1953 }
1954 return True;
1955}
1956
1957
sewardj7aa38a92011-02-27 23:04:12 +00001958/* Create a new, empty VTS.
sewardjf98e1c02008-10-25 16:22:41 +00001959*/
florian6bd9dc12012-11-23 16:17:43 +00001960static VTS* VTS__new ( const HChar* who, UInt sizeTS )
sewardjf98e1c02008-10-25 16:22:41 +00001961{
sewardj7aa38a92011-02-27 23:04:12 +00001962 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
1963 tl_assert(vts->usedTS == 0);
1964 vts->sizeTS = sizeTS;
1965 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
sewardjf98e1c02008-10-25 16:22:41 +00001966 return vts;
1967}
1968
sewardj7aa38a92011-02-27 23:04:12 +00001969/* Clone this VTS.
1970*/
florian6bd9dc12012-11-23 16:17:43 +00001971static VTS* VTS__clone ( const HChar* who, VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00001972{
1973 tl_assert(vts);
1974 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1975 UInt nTS = vts->usedTS;
1976 VTS* clone = VTS__new(who, nTS);
1977 clone->id = vts->id;
1978 clone->sizeTS = nTS;
1979 clone->usedTS = nTS;
1980 UInt i;
1981 for (i = 0; i < nTS; i++) {
1982 clone->ts[i] = vts->ts[i];
1983 }
1984 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1985 return clone;
1986}
1987
sewardjf98e1c02008-10-25 16:22:41 +00001988
sewardjffce8152011-06-24 10:09:41 +00001989/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
1990 must be in strictly increasing order. We could obviously do this
1991 much more efficiently (in linear time) if necessary.
1992*/
florian6bd9dc12012-11-23 16:17:43 +00001993static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
sewardjffce8152011-06-24 10:09:41 +00001994{
1995 UInt i, j;
1996 tl_assert(vts);
1997 tl_assert(thridsToDel);
1998 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1999 UInt nTS = vts->usedTS;
2000 /* Figure out how many ScalarTSs will remain in the output. */
2001 UInt nReq = nTS;
2002 for (i = 0; i < nTS; i++) {
2003 ThrID thrid = vts->ts[i].thrid;
2004 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2005 nReq--;
2006 }
2007 tl_assert(nReq <= nTS);
2008 /* Copy the ones that will remain. */
2009 VTS* res = VTS__new(who, nReq);
2010 j = 0;
2011 for (i = 0; i < nTS; i++) {
2012 ThrID thrid = vts->ts[i].thrid;
2013 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2014 continue;
2015 res->ts[j++] = vts->ts[i];
2016 }
2017 tl_assert(j == nReq);
2018 tl_assert(j == res->sizeTS);
2019 res->usedTS = j;
2020 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2021 return res;
2022}
2023
2024
sewardjf98e1c02008-10-25 16:22:41 +00002025/* Delete this VTS in its entirety.
2026*/
sewardj7aa38a92011-02-27 23:04:12 +00002027static void VTS__delete ( VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002028{
2029 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002030 tl_assert(vts->usedTS <= vts->sizeTS);
2031 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
sewardjf98e1c02008-10-25 16:22:41 +00002032 HG_(free)(vts);
2033}
2034
2035
2036/* Create a new singleton VTS.
2037*/
sewardj7aa38a92011-02-27 23:04:12 +00002038static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2039{
sewardjf98e1c02008-10-25 16:22:41 +00002040 tl_assert(thr);
2041 tl_assert(tym >= 1);
sewardj7aa38a92011-02-27 23:04:12 +00002042 tl_assert(out);
2043 tl_assert(out->usedTS == 0);
2044 tl_assert(out->sizeTS >= 1);
2045 UInt hi = out->usedTS++;
2046 out->ts[hi].thrid = Thr__to_ThrID(thr);
2047 out->ts[hi].tym = tym;
sewardjf98e1c02008-10-25 16:22:41 +00002048}
2049
2050
2051/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2052 not modified.
2053*/
sewardj7aa38a92011-02-27 23:04:12 +00002054static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002055{
sewardj7aa38a92011-02-27 23:04:12 +00002056 UInt i, n;
sewardje4cce742011-02-24 15:25:24 +00002057 ThrID me_thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002058 Bool found = False;
sewardjc8028ad2010-05-05 09:34:42 +00002059
2060 stats__vts__tick++;
2061
sewardj7aa38a92011-02-27 23:04:12 +00002062 tl_assert(out);
2063 tl_assert(out->usedTS == 0);
2064 if (vts->usedTS >= ThrID_MAX_VALID)
2065 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2066 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2067
sewardjf98e1c02008-10-25 16:22:41 +00002068 tl_assert(me);
sewardje4cce742011-02-24 15:25:24 +00002069 me_thrid = Thr__to_ThrID(me);
sewardjf98e1c02008-10-25 16:22:41 +00002070 tl_assert(is_sane_VTS(vts));
sewardj7aa38a92011-02-27 23:04:12 +00002071 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002072
sewardj555fc572011-02-27 23:39:53 +00002073 /* Copy all entries which precede 'me'. */
2074 for (i = 0; i < n; i++) {
2075 ScalarTS* here = &vts->ts[i];
2076 if (UNLIKELY(here->thrid >= me_thrid))
2077 break;
2078 UInt hi = out->usedTS++;
2079 out->ts[hi] = *here;
2080 }
2081
2082 /* 'i' now indicates the next entry to copy, if any.
2083 There are 3 possibilities:
2084 (a) there is no next entry (we used them all up already):
2085 add (me_thrid,1) to the output, and quit
2086 (b) there is a next entry, and its thrid > me_thrid:
2087 add (me_thrid,1) to the output, then copy the remaining entries
2088 (c) there is a next entry, and its thrid == me_thrid:
2089 copy it to the output but increment its timestamp value.
2090 Then copy the remaining entries. (c) is the common case.
2091 */
2092 tl_assert(i >= 0 && i <= n);
2093 if (i == n) { /* case (a) */
sewardj7aa38a92011-02-27 23:04:12 +00002094 UInt hi = out->usedTS++;
2095 out->ts[hi].thrid = me_thrid;
2096 out->ts[hi].tym = 1;
sewardj555fc572011-02-27 23:39:53 +00002097 } else {
2098 /* cases (b) and (c) */
2099 ScalarTS* here = &vts->ts[i];
2100 if (me_thrid == here->thrid) { /* case (c) */
sewardj7aa38a92011-02-27 23:04:12 +00002101 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
sewardje4cce742011-02-24 15:25:24 +00002102 /* We're hosed. We have to stop. */
2103 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2104 }
sewardj7aa38a92011-02-27 23:04:12 +00002105 UInt hi = out->usedTS++;
2106 out->ts[hi].thrid = here->thrid;
2107 out->ts[hi].tym = here->tym + 1;
sewardjf98e1c02008-10-25 16:22:41 +00002108 i++;
sewardj555fc572011-02-27 23:39:53 +00002109 found = True;
2110 } else { /* case (b) */
sewardj7aa38a92011-02-27 23:04:12 +00002111 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002112 out->ts[hi].thrid = me_thrid;
2113 out->ts[hi].tym = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002114 }
sewardj555fc572011-02-27 23:39:53 +00002115 /* And copy any remaining entries. */
sewardjf98e1c02008-10-25 16:22:41 +00002116 for (/*keepgoing*/; i < n; i++) {
sewardj555fc572011-02-27 23:39:53 +00002117 ScalarTS* here2 = &vts->ts[i];
sewardj7aa38a92011-02-27 23:04:12 +00002118 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002119 out->ts[hi] = *here2;
sewardjf98e1c02008-10-25 16:22:41 +00002120 }
2121 }
sewardj555fc572011-02-27 23:39:53 +00002122
sewardj7aa38a92011-02-27 23:04:12 +00002123 tl_assert(is_sane_VTS(out));
2124 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2125 tl_assert(out->usedTS <= out->sizeTS);
sewardjf98e1c02008-10-25 16:22:41 +00002126}
2127
2128
2129/* Return a new VTS constructed as the join (max) of the 2 args.
2130 Neither arg is modified.
2131*/
sewardj7aa38a92011-02-27 23:04:12 +00002132static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002133{
sewardj7aa38a92011-02-27 23:04:12 +00002134 UInt ia, ib, useda, usedb;
sewardjf98e1c02008-10-25 16:22:41 +00002135 ULong tyma, tymb, tymMax;
sewardje4cce742011-02-24 15:25:24 +00002136 ThrID thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002137 UInt ncommon = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002138
sewardjc8028ad2010-05-05 09:34:42 +00002139 stats__vts__join++;
2140
sewardj7aa38a92011-02-27 23:04:12 +00002141 tl_assert(a);
2142 tl_assert(b);
2143 useda = a->usedTS;
2144 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002145
sewardj7aa38a92011-02-27 23:04:12 +00002146 tl_assert(out);
2147 tl_assert(out->usedTS == 0);
2148 /* overly conservative test, but doing better involves comparing
2149 the two VTSs, which we don't want to do at this point. */
2150 if (useda + usedb >= ThrID_MAX_VALID)
2151 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2152 tl_assert(out->sizeTS >= useda + usedb);
2153
sewardjf98e1c02008-10-25 16:22:41 +00002154 ia = ib = 0;
2155
2156 while (1) {
2157
sewardje4cce742011-02-24 15:25:24 +00002158 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2159 from a and b in order, where thrid is the next ThrID
sewardjf98e1c02008-10-25 16:22:41 +00002160 occurring in either a or b, and tyma/b are the relevant
2161 scalar timestamps, taking into account implicit zeroes. */
2162 tl_assert(ia >= 0 && ia <= useda);
2163 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002164
njn4c245e52009-03-15 23:25:38 +00002165 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002166 /* both empty - done */
2167 break;
njn4c245e52009-03-15 23:25:38 +00002168
2169 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002170 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002171 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002172 thrid = tmpb->thrid;
2173 tyma = 0;
2174 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002175 ib++;
njn4c245e52009-03-15 23:25:38 +00002176
2177 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002178 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002179 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002180 thrid = tmpa->thrid;
2181 tyma = tmpa->tym;
2182 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002183 ia++;
njn4c245e52009-03-15 23:25:38 +00002184
2185 } else {
sewardje4cce742011-02-24 15:25:24 +00002186 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002187 ScalarTS* tmpa = &a->ts[ia];
2188 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002189 if (tmpa->thrid < tmpb->thrid) {
2190 /* a has the lowest unconsidered ThrID */
2191 thrid = tmpa->thrid;
2192 tyma = tmpa->tym;
2193 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002194 ia++;
sewardje4cce742011-02-24 15:25:24 +00002195 } else if (tmpa->thrid > tmpb->thrid) {
2196 /* b has the lowest unconsidered ThrID */
2197 thrid = tmpb->thrid;
2198 tyma = 0;
2199 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002200 ib++;
2201 } else {
sewardje4cce742011-02-24 15:25:24 +00002202 /* they both next mention the same ThrID */
2203 tl_assert(tmpa->thrid == tmpb->thrid);
2204 thrid = tmpa->thrid; /* == tmpb->thrid */
2205 tyma = tmpa->tym;
2206 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002207 ia++;
2208 ib++;
sewardj7aa38a92011-02-27 23:04:12 +00002209 ncommon++;
sewardjf98e1c02008-10-25 16:22:41 +00002210 }
2211 }
2212
2213 /* having laboriously determined (thr, tyma, tymb), do something
2214 useful with it. */
2215 tymMax = tyma > tymb ? tyma : tymb;
2216 if (tymMax > 0) {
sewardj7aa38a92011-02-27 23:04:12 +00002217 UInt hi = out->usedTS++;
2218 out->ts[hi].thrid = thrid;
2219 out->ts[hi].tym = tymMax;
sewardjf98e1c02008-10-25 16:22:41 +00002220 }
2221
2222 }
2223
sewardj7aa38a92011-02-27 23:04:12 +00002224 tl_assert(is_sane_VTS(out));
2225 tl_assert(out->usedTS <= out->sizeTS);
2226 tl_assert(out->usedTS == useda + usedb - ncommon);
sewardjf98e1c02008-10-25 16:22:41 +00002227}
2228
2229
sewardje4cce742011-02-24 15:25:24 +00002230/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2231 they are, or the first ThrID for which they are not (no valid ThrID
2232 has the value zero). This rather strange convention is used
2233 because sometimes we want to know the actual index at which they
2234 first differ. */
2235static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002236{
sewardj23f12002009-07-24 08:45:08 +00002237 Word ia, ib, useda, usedb;
2238 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00002239
sewardjc8028ad2010-05-05 09:34:42 +00002240 stats__vts__cmpLEQ++;
2241
sewardj7aa38a92011-02-27 23:04:12 +00002242 tl_assert(a);
2243 tl_assert(b);
2244 useda = a->usedTS;
2245 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002246
2247 ia = ib = 0;
2248
2249 while (1) {
2250
njn4c245e52009-03-15 23:25:38 +00002251 /* This logic is to enumerate doubles (tyma, tymb) drawn
2252 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00002253 scalar timestamps, taking into account implicit zeroes. */
sewardje4cce742011-02-24 15:25:24 +00002254 ThrID thrid;
sewardj23f12002009-07-24 08:45:08 +00002255
sewardjf98e1c02008-10-25 16:22:41 +00002256 tl_assert(ia >= 0 && ia <= useda);
2257 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002258
njn4c245e52009-03-15 23:25:38 +00002259 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002260 /* both empty - done */
2261 break;
njn4c245e52009-03-15 23:25:38 +00002262
2263 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002264 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002265 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002266 tyma = 0;
2267 tymb = tmpb->tym;
2268 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002269 ib++;
njn4c245e52009-03-15 23:25:38 +00002270
2271 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002272 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002273 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002274 tyma = tmpa->tym;
2275 thrid = tmpa->thrid;
2276 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002277 ia++;
njn4c245e52009-03-15 23:25:38 +00002278
2279 } else {
sewardje4cce742011-02-24 15:25:24 +00002280 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002281 ScalarTS* tmpa = &a->ts[ia];
2282 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002283 if (tmpa->thrid < tmpb->thrid) {
2284 /* a has the lowest unconsidered ThrID */
2285 tyma = tmpa->tym;
2286 thrid = tmpa->thrid;
2287 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002288 ia++;
2289 }
2290 else
sewardje4cce742011-02-24 15:25:24 +00002291 if (tmpa->thrid > tmpb->thrid) {
2292 /* b has the lowest unconsidered ThrID */
2293 tyma = 0;
2294 tymb = tmpb->tym;
2295 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002296 ib++;
2297 } else {
sewardje4cce742011-02-24 15:25:24 +00002298 /* they both next mention the same ThrID */
2299 tl_assert(tmpa->thrid == tmpb->thrid);
2300 tyma = tmpa->tym;
2301 thrid = tmpa->thrid;
2302 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002303 ia++;
2304 ib++;
2305 }
2306 }
2307
njn4c245e52009-03-15 23:25:38 +00002308 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00002309 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00002310 if (tyma > tymb) {
2311 /* not LEQ at this index. Quit, since the answer is
2312 determined already. */
sewardje4cce742011-02-24 15:25:24 +00002313 tl_assert(thrid >= 1024);
2314 return thrid;
sewardj23f12002009-07-24 08:45:08 +00002315 }
sewardjf98e1c02008-10-25 16:22:41 +00002316 }
2317
sewardje4cce742011-02-24 15:25:24 +00002318 return 0; /* all points are LEQ => return an invalid ThrID */
sewardjf98e1c02008-10-25 16:22:41 +00002319}
2320
2321
2322/* Compute an arbitrary structural (total) ordering on the two args,
2323 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00002324 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2325 performance critical so there is some effort expended to make it sa
2326 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00002327*/
2328Word VTS__cmp_structural ( VTS* a, VTS* b )
2329{
2330 /* We just need to generate an arbitrary total ordering based on
2331 a->ts and b->ts. Preferably do it in a way which comes across likely
2332 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00002333 Word i;
2334 Word useda = 0, usedb = 0;
2335 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002336
sewardjc8028ad2010-05-05 09:34:42 +00002337 stats__vts__cmp_structural++;
2338
2339 tl_assert(a);
2340 tl_assert(b);
2341
sewardj7aa38a92011-02-27 23:04:12 +00002342 ctsa = &a->ts[0]; useda = a->usedTS;
2343 ctsb = &b->ts[0]; usedb = b->usedTS;
sewardjc8028ad2010-05-05 09:34:42 +00002344
2345 if (LIKELY(useda == usedb)) {
2346 ScalarTS *tmpa = NULL, *tmpb = NULL;
2347 stats__vts__cmp_structural_slow++;
2348 /* Same length vectors. Find the first difference, if any, as
2349 fast as possible. */
2350 for (i = 0; i < useda; i++) {
2351 tmpa = &ctsa[i];
2352 tmpb = &ctsb[i];
sewardje4cce742011-02-24 15:25:24 +00002353 if (LIKELY(tmpa->tym == tmpb->tym
2354 && tmpa->thrid == tmpb->thrid))
sewardjc8028ad2010-05-05 09:34:42 +00002355 continue;
2356 else
2357 break;
2358 }
2359 if (UNLIKELY(i == useda)) {
2360 /* They're identical. */
2361 return 0;
2362 } else {
2363 tl_assert(i >= 0 && i < useda);
2364 if (tmpa->tym < tmpb->tym) return -1;
2365 if (tmpa->tym > tmpb->tym) return 1;
sewardje4cce742011-02-24 15:25:24 +00002366 if (tmpa->thrid < tmpb->thrid) return -1;
2367 if (tmpa->thrid > tmpb->thrid) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002368 /* we just established them as non-identical, hence: */
2369 }
2370 /*NOTREACHED*/
2371 tl_assert(0);
2372 }
sewardjf98e1c02008-10-25 16:22:41 +00002373
2374 if (useda < usedb) return -1;
2375 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002376 /*NOTREACHED*/
2377 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00002378}
2379
2380
florianb28fe892014-10-28 20:52:07 +00002381/* Debugging only. Display the given VTS.
sewardjf98e1c02008-10-25 16:22:41 +00002382*/
florianb28fe892014-10-28 20:52:07 +00002383static void VTS__show ( const VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00002384{
sewardjf98e1c02008-10-25 16:22:41 +00002385 Word i, n;
sewardjf98e1c02008-10-25 16:22:41 +00002386 tl_assert(vts && vts->ts);
florianb28fe892014-10-28 20:52:07 +00002387
2388 VG_(printf)("[");
sewardj7aa38a92011-02-27 23:04:12 +00002389 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002390 for (i = 0; i < n; i++) {
florianb28fe892014-10-28 20:52:07 +00002391 const ScalarTS *st = &vts->ts[i];
2392 VG_(printf)(i < n-1 ? "%u:%llu " : "%u:%llu", st->thrid, (ULong)st->tym);
sewardjf98e1c02008-10-25 16:22:41 +00002393 }
florianb28fe892014-10-28 20:52:07 +00002394 VG_(printf)("]");
sewardjf98e1c02008-10-25 16:22:41 +00002395}
2396
2397
2398/* Debugging only. Return vts[index], so to speak.
2399*/
sewardj7aa38a92011-02-27 23:04:12 +00002400ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2401{
sewardjf98e1c02008-10-25 16:22:41 +00002402 UWord i, n;
sewardje4cce742011-02-24 15:25:24 +00002403 ThrID idx_thrid = Thr__to_ThrID(idx);
sewardjc8028ad2010-05-05 09:34:42 +00002404 stats__vts__indexat_slow++;
sewardjf98e1c02008-10-25 16:22:41 +00002405 tl_assert(vts && vts->ts);
sewardj7aa38a92011-02-27 23:04:12 +00002406 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002407 for (i = 0; i < n; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002408 ScalarTS* st = &vts->ts[i];
sewardje4cce742011-02-24 15:25:24 +00002409 if (st->thrid == idx_thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002410 return st->tym;
2411 }
2412 return 0;
2413}
2414
2415
sewardjffce8152011-06-24 10:09:41 +00002416/* See comment on prototype above.
2417*/
2418static void VTS__declare_thread_very_dead ( Thr* thr )
2419{
2420 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2421
2422 tl_assert(thr->llexit_done);
2423 tl_assert(thr->joinedwith_done);
2424
2425 ThrID nyu;
2426 nyu = Thr__to_ThrID(thr);
2427 VG_(addToXA)( verydead_thread_table, &nyu );
2428
2429 /* We can only get here if we're assured that we'll never again
2430 need to look at this thread's ::viR or ::viW. Set them to
2431 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2432 mostly so that we don't wind up pruning them (as that would be
2433 nonsensical: the only interesting ScalarTS entry for a dead
2434 thread is its own index, and the pruning will remove that.). */
2435 VtsID__rcdec(thr->viR);
2436 VtsID__rcdec(thr->viW);
2437 thr->viR = VtsID_INVALID;
2438 thr->viW = VtsID_INVALID;
2439}
2440
2441
sewardjf98e1c02008-10-25 16:22:41 +00002442/////////////////////////////////////////////////////////////////
2443/////////////////////////////////////////////////////////////////
2444// //
2445// SECTION END vts primitives //
2446// //
2447/////////////////////////////////////////////////////////////////
2448/////////////////////////////////////////////////////////////////
2449
2450
2451
2452/////////////////////////////////////////////////////////////////
2453/////////////////////////////////////////////////////////////////
2454// //
2455// SECTION BEGIN main library //
2456// //
2457/////////////////////////////////////////////////////////////////
2458/////////////////////////////////////////////////////////////////
2459
2460
2461/////////////////////////////////////////////////////////
2462// //
2463// VTS set //
2464// //
2465/////////////////////////////////////////////////////////
2466
sewardjffce8152011-06-24 10:09:41 +00002467static WordFM* /* WordFM VTS* void */ vts_set = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002468
2469static void vts_set_init ( void )
2470{
2471 tl_assert(!vts_set);
2472 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2473 HG_(free),
2474 (Word(*)(UWord,UWord))VTS__cmp_structural );
sewardjf98e1c02008-10-25 16:22:41 +00002475}
2476
sewardj7aa38a92011-02-27 23:04:12 +00002477/* Given a VTS, look in vts_set to see if we already have a
2478 structurally identical one. If yes, return the pair (True, pointer
2479 to the existing one). If no, clone this one, add the clone to the
2480 set, and return (False, pointer to the clone). */
2481static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002482{
2483 UWord keyW, valW;
sewardj7aa38a92011-02-27 23:04:12 +00002484 stats__vts_set__focaa++;
2485 tl_assert(cand->id == VtsID_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00002486 /* lookup cand (by value) */
2487 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2488 /* found it */
2489 tl_assert(valW == 0);
2490 /* if this fails, cand (by ref) was already present (!) */
2491 tl_assert(keyW != (UWord)cand);
sewardj7aa38a92011-02-27 23:04:12 +00002492 *res = (VTS*)keyW;
2493 return True;
sewardjf98e1c02008-10-25 16:22:41 +00002494 } else {
sewardj7aa38a92011-02-27 23:04:12 +00002495 /* not present. Clone, add and return address of clone. */
2496 stats__vts_set__focaa_a++;
2497 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2498 tl_assert(clone != cand);
2499 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2500 *res = clone;
2501 return False;
sewardjf98e1c02008-10-25 16:22:41 +00002502 }
2503}
2504
2505
2506/////////////////////////////////////////////////////////
2507// //
2508// VTS table //
2509// //
2510/////////////////////////////////////////////////////////
2511
2512static void VtsID__invalidate_caches ( void ); /* fwds */
2513
2514/* A type to hold VTS table entries. Invariants:
2515 If .vts == NULL, then this entry is not in use, so:
2516 - .rc == 0
2517 - this entry is on the freelist (unfortunately, does not imply
sewardjffce8152011-06-24 10:09:41 +00002518 any constraints on value for .freelink)
sewardjf98e1c02008-10-25 16:22:41 +00002519 If .vts != NULL, then this entry is in use:
2520 - .vts is findable in vts_set
2521 - .vts->id == this entry number
2522 - no specific value for .rc (even 0 is OK)
sewardjffce8152011-06-24 10:09:41 +00002523 - this entry is not on freelist, so .freelink == VtsID_INVALID
sewardjf98e1c02008-10-25 16:22:41 +00002524*/
2525typedef
2526 struct {
2527 VTS* vts; /* vts, in vts_set */
2528 UWord rc; /* reference count - enough for entire aspace */
2529 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
sewardjffce8152011-06-24 10:09:41 +00002530 VtsID remap; /* used only during pruning */
sewardjf98e1c02008-10-25 16:22:41 +00002531 }
2532 VtsTE;
2533
2534/* The VTS table. */
2535static XArray* /* of VtsTE */ vts_tab = NULL;
2536
2537/* An index into the VTS table, indicating the start of the list of
2538 free (available for use) entries. If the list is empty, this is
2539 VtsID_INVALID. */
2540static VtsID vts_tab_freelist = VtsID_INVALID;
2541
2542/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2543 vts_tab equals or exceeds this size. After GC, the value here is
2544 set appropriately so as to check for the next GC point. */
2545static Word vts_next_GC_at = 1000;
2546
2547static void vts_tab_init ( void )
2548{
florian91ed8cc2014-09-15 18:50:17 +00002549 vts_tab = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2550 HG_(free), sizeof(VtsTE) );
2551 vts_tab_freelist = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002552}
2553
2554/* Add ii to the free list, checking that it looks out-of-use. */
2555static void add_to_free_list ( VtsID ii )
2556{
2557 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2558 tl_assert(ie->vts == NULL);
2559 tl_assert(ie->rc == 0);
2560 tl_assert(ie->freelink == VtsID_INVALID);
2561 ie->freelink = vts_tab_freelist;
2562 vts_tab_freelist = ii;
2563}
2564
2565/* Get an entry from the free list. This will return VtsID_INVALID if
2566 the free list is empty. */
2567static VtsID get_from_free_list ( void )
2568{
2569 VtsID ii;
2570 VtsTE* ie;
2571 if (vts_tab_freelist == VtsID_INVALID)
2572 return VtsID_INVALID;
2573 ii = vts_tab_freelist;
2574 ie = VG_(indexXA)( vts_tab, ii );
2575 tl_assert(ie->vts == NULL);
2576 tl_assert(ie->rc == 0);
2577 vts_tab_freelist = ie->freelink;
2578 return ii;
2579}
2580
2581/* Produce a new VtsID that can be used, either by getting it from
2582 the freelist, or, if that is empty, by expanding vts_tab. */
2583static VtsID get_new_VtsID ( void )
2584{
2585 VtsID ii;
2586 VtsTE te;
2587 ii = get_from_free_list();
2588 if (ii != VtsID_INVALID)
2589 return ii;
2590 te.vts = NULL;
2591 te.rc = 0;
2592 te.freelink = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00002593 te.remap = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002594 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2595 return ii;
2596}
2597
2598
2599/* Indirect callback from lib_zsm. */
2600static void VtsID__rcinc ( VtsID ii )
2601{
2602 VtsTE* ie;
2603 /* VG_(indexXA) does a range check for us */
2604 ie = VG_(indexXA)( vts_tab, ii );
2605 tl_assert(ie->vts); /* else it's not in use */
2606 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2607 tl_assert(ie->vts->id == ii);
2608 ie->rc++;
2609}
2610
2611/* Indirect callback from lib_zsm. */
2612static void VtsID__rcdec ( VtsID ii )
2613{
2614 VtsTE* ie;
2615 /* VG_(indexXA) does a range check for us */
2616 ie = VG_(indexXA)( vts_tab, ii );
2617 tl_assert(ie->vts); /* else it's not in use */
2618 tl_assert(ie->rc > 0); /* else RC snafu */
2619 tl_assert(ie->vts->id == ii);
2620 ie->rc--;
2621}
2622
2623
sewardj7aa38a92011-02-27 23:04:12 +00002624/* Look up 'cand' in our collection of VTSs. If present, return the
2625 VtsID for the pre-existing version. If not present, clone it, add
2626 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2627 it, and return that. */
2628static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002629{
sewardj7aa38a92011-02-27 23:04:12 +00002630 VTS* in_tab = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002631 tl_assert(cand->id == VtsID_INVALID);
sewardj7aa38a92011-02-27 23:04:12 +00002632 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2633 tl_assert(in_tab);
2634 if (already_have) {
2635 /* We already have a copy of 'cand'. Use that. */
sewardjf98e1c02008-10-25 16:22:41 +00002636 VtsTE* ie;
sewardj7aa38a92011-02-27 23:04:12 +00002637 tl_assert(in_tab->id != VtsID_INVALID);
2638 ie = VG_(indexXA)( vts_tab, in_tab->id );
2639 tl_assert(ie->vts == in_tab);
2640 return in_tab->id;
sewardjf98e1c02008-10-25 16:22:41 +00002641 } else {
2642 VtsID ii = get_new_VtsID();
2643 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
sewardj7aa38a92011-02-27 23:04:12 +00002644 ie->vts = in_tab;
sewardjf98e1c02008-10-25 16:22:41 +00002645 ie->rc = 0;
2646 ie->freelink = VtsID_INVALID;
sewardj7aa38a92011-02-27 23:04:12 +00002647 in_tab->id = ii;
sewardjf98e1c02008-10-25 16:22:41 +00002648 return ii;
2649 }
2650}
2651
2652
florian6bd9dc12012-11-23 16:17:43 +00002653static void show_vts_stats ( const HChar* caller )
sewardjf98e1c02008-10-25 16:22:41 +00002654{
2655 UWord nSet, nTab, nLive;
2656 ULong totrc;
2657 UWord n, i;
2658 nSet = VG_(sizeFM)( vts_set );
2659 nTab = VG_(sizeXA)( vts_tab );
2660 totrc = 0;
2661 nLive = 0;
2662 n = VG_(sizeXA)( vts_tab );
2663 for (i = 0; i < n; i++) {
2664 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2665 if (ie->vts) {
2666 nLive++;
2667 totrc += (ULong)ie->rc;
2668 } else {
2669 tl_assert(ie->rc == 0);
2670 }
2671 }
2672 VG_(printf)(" show_vts_stats %s\n", caller);
2673 VG_(printf)(" vts_tab size %4lu\n", nTab);
2674 VG_(printf)(" vts_tab live %4lu\n", nLive);
2675 VG_(printf)(" vts_set size %4lu\n", nSet);
2676 VG_(printf)(" total rc %4llu\n", totrc);
2677}
2678
sewardjffce8152011-06-24 10:09:41 +00002679
2680/* --- Helpers for VtsID pruning --- */
2681
2682static
2683void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2684 /*MOD*/XArray* /* of VtsTE */ new_tab,
2685 VtsID* ii )
2686{
2687 VtsTE *old_te, *new_te;
2688 VtsID old_id, new_id;
2689 /* We're relying here on VG_(indexXA)'s range checking to assert on
2690 any stupid values, in particular *ii == VtsID_INVALID. */
2691 old_id = *ii;
2692 old_te = VG_(indexXA)( old_tab, old_id );
2693 old_te->rc--;
2694 new_id = old_te->remap;
2695 new_te = VG_(indexXA)( new_tab, new_id );
2696 new_te->rc++;
2697 *ii = new_id;
2698}
2699
2700static
2701void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2702 /*MOD*/XArray* /* of VtsTE */ new_tab,
2703 SVal* s )
2704{
2705 SVal old_sv, new_sv;
2706 old_sv = *s;
2707 if (SVal__isC(old_sv)) {
2708 VtsID rMin, wMin;
2709 rMin = SVal__unC_Rmin(old_sv);
2710 wMin = SVal__unC_Wmin(old_sv);
2711 remap_VtsID( old_tab, new_tab, &rMin );
2712 remap_VtsID( old_tab, new_tab, &wMin );
2713 new_sv = SVal__mkC( rMin, wMin );
2714 *s = new_sv;
2715 }
2716}
2717
2718
sewardjf98e1c02008-10-25 16:22:41 +00002719/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002720__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002721static void vts_tab__do_GC ( Bool show_stats )
2722{
2723 UWord i, nTab, nLive, nFreed;
2724
sewardjffce8152011-06-24 10:09:41 +00002725 /* ---------- BEGIN VTS GC ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00002726 /* check this is actually necessary. */
2727 tl_assert(vts_tab_freelist == VtsID_INVALID);
2728
2729 /* empty the caches for partial order checks and binary joins. We
2730 could do better and prune out the entries to be deleted, but it
2731 ain't worth the hassle. */
2732 VtsID__invalidate_caches();
2733
2734 /* First, make the reference counts up to date. */
2735 zsm_flush_cache();
2736
2737 nTab = VG_(sizeXA)( vts_tab );
2738
2739 if (show_stats) {
2740 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2741 show_vts_stats("before GC");
2742 }
2743
sewardjffce8152011-06-24 10:09:41 +00002744 /* Now we can inspect the entire vts_tab. Any entries with zero
2745 .rc fields are now no longer in use and can be put back on the
sewardjf98e1c02008-10-25 16:22:41 +00002746 free list, removed from vts_set, and deleted. */
2747 nFreed = 0;
2748 for (i = 0; i < nTab; i++) {
2749 Bool present;
sewardjffce8152011-06-24 10:09:41 +00002750 UWord oldK = 0, oldV = 12345;
sewardjf98e1c02008-10-25 16:22:41 +00002751 VtsTE* te = VG_(indexXA)( vts_tab, i );
2752 if (te->vts == NULL) {
2753 tl_assert(te->rc == 0);
2754 continue; /* already on the free list (presumably) */
2755 }
2756 if (te->rc > 0)
2757 continue; /* in use */
2758 /* Ok, we got one we can free. */
2759 tl_assert(te->vts->id == i);
2760 /* first, remove it from vts_set. */
2761 present = VG_(delFromFM)( vts_set,
2762 &oldK, &oldV, (UWord)te->vts );
2763 tl_assert(present); /* else it isn't in vts_set ?! */
2764 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2765 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2766 /* now free the VTS itself */
2767 VTS__delete(te->vts);
2768 te->vts = NULL;
2769 /* and finally put this entry on the free list */
2770 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2771 add_to_free_list( i );
2772 nFreed++;
2773 }
2774
2775 /* Now figure out when the next GC should be. We'll allow the
2776 number of VTSs to double before GCing again. Except of course
2777 that since we can't (or, at least, don't) shrink vts_tab, we
2778 can't set the threshhold value smaller than it. */
2779 tl_assert(nFreed <= nTab);
2780 nLive = nTab - nFreed;
2781 tl_assert(nLive >= 0 && nLive <= nTab);
2782 vts_next_GC_at = 2 * nLive;
2783 if (vts_next_GC_at < nTab)
2784 vts_next_GC_at = nTab;
2785
2786 if (show_stats) {
2787 show_vts_stats("after GC");
2788 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2789 }
2790
sewardj5e2ac3b2009-08-11 10:39:25 +00002791 if (VG_(clo_stats)) {
sewardjffce8152011-06-24 10:09:41 +00002792 static UInt ctr = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002793 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002794 VG_(message)(Vg_DebugMsg,
sewardj24118492009-07-15 14:50:02 +00002795 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
sewardj8aa41de2009-01-22 12:24:26 +00002796 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00002797 }
sewardjffce8152011-06-24 10:09:41 +00002798 /* ---------- END VTS GC ---------- */
2799
2800 /* Decide whether to do VTS pruning. We have one of three
2801 settings. */
2802 static UInt pruning_auto_ctr = 0; /* do not make non-static */
2803
2804 Bool do_pruning = False;
2805 switch (HG_(clo_vts_pruning)) {
2806 case 0: /* never */
2807 break;
2808 case 1: /* auto */
2809 do_pruning = (++pruning_auto_ctr % 5) == 0;
2810 break;
2811 case 2: /* always */
2812 do_pruning = True;
2813 break;
2814 default:
2815 tl_assert(0);
2816 }
2817
2818 /* The rest of this routine only handles pruning, so we can
2819 quit at this point if it is not to be done. */
2820 if (!do_pruning)
2821 return;
2822
2823 /* ---------- BEGIN VTS PRUNING ---------- */
2824 /* We begin by sorting the backing table on its .thr values, so as
2825 to (1) check they are unique [else something has gone wrong,
2826 since it means we must have seen some Thr* exiting more than
2827 once, which can't happen], and (2) so that we can quickly look
2828 up the dead-thread entries as we work through the VTSs. */
2829 VG_(sortXA)( verydead_thread_table );
2830 /* Sanity check: check for unique .sts.thr values. */
2831 UWord nBT = VG_(sizeXA)( verydead_thread_table );
2832 if (nBT > 0) {
2833 ThrID thrid1, thrid2;
2834 thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, 0 );
2835 for (i = 1; i < nBT; i++) {
2836 thrid1 = thrid2;
2837 thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, i );
2838 tl_assert(thrid1 < thrid2);
2839 }
2840 }
2841 /* Ok, so the dead thread table has unique and in-order keys. */
2842
2843 /* We will run through the old table, and create a new table and
2844 set, at the same time setting the .remap entries in the old
2845 table to point to the new entries. Then, visit every VtsID in
2846 the system, and replace all of them with new ones, using the
2847 .remap entries in the old table. Finally, we can delete the old
2848 table and set. */
2849
2850 XArray* /* of VtsTE */ new_tab
2851 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
2852 HG_(free), sizeof(VtsTE) );
2853
2854 /* WordFM VTS* void */
2855 WordFM* new_set
2856 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
2857 HG_(free),
2858 (Word(*)(UWord,UWord))VTS__cmp_structural );
2859
2860 /* Visit each old VTS. For each one:
2861
2862 * make a pruned version
2863
2864 * search new_set for the pruned version, yielding either
2865 Nothing (not present) or the new VtsID for it.
2866
2867 * if not present, allocate a new VtsID for it, insert (pruned
2868 VTS, new VtsID) in the tree, and set
2869 remap_table[old VtsID] = new VtsID.
2870
2871 * if present, set remap_table[old VtsID] = new VtsID, where
2872 new VtsID was determined by the tree lookup. Then free up
2873 the clone.
2874 */
2875
2876 UWord nBeforePruning = 0, nAfterPruning = 0;
2877 UWord nSTSsBefore = 0, nSTSsAfter = 0;
2878 VtsID new_VtsID_ctr = 0;
2879
2880 for (i = 0; i < nTab; i++) {
2881
2882 /* For each old VTS .. */
2883 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
2884 VTS* old_vts = old_te->vts;
2885 tl_assert(old_te->remap == VtsID_INVALID);
2886
2887 /* Skip it if not in use */
2888 if (old_te->rc == 0) {
2889 tl_assert(old_vts == NULL);
2890 continue;
2891 }
2892 tl_assert(old_vts != NULL);
2893 tl_assert(old_vts->id == i);
2894 tl_assert(old_vts->ts != NULL);
2895
2896 /* It is in use. Make a pruned version. */
2897 nBeforePruning++;
2898 nSTSsBefore += old_vts->usedTS;
2899 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
2900 old_vts, verydead_thread_table);
2901 tl_assert(new_vts->sizeTS == new_vts->usedTS);
2902 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
2903 == 0x0ddC0ffeeBadF00dULL);
2904
2905 /* Get rid of the old VTS and the tree entry. It's a bit more
2906 complex to incrementally delete the VTSs now than to nuke
2907 them all after we're done, but the upside is that we don't
2908 wind up temporarily storing potentially two complete copies
2909 of each VTS and hence spiking memory use. */
2910 UWord oldK = 0, oldV = 12345;
2911 Bool present = VG_(delFromFM)( vts_set,
2912 &oldK, &oldV, (UWord)old_vts );
2913 tl_assert(present); /* else it isn't in vts_set ?! */
2914 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2915 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
2916 /* now free the VTS itself */
2917 VTS__delete(old_vts);
2918 old_te->vts = NULL;
2919 old_vts = NULL;
2920
2921 /* NO MENTIONS of old_vts allowed beyond this point. */
2922
2923 /* Ok, we have the pruned copy in new_vts. See if a
2924 structurally identical version is already present in new_set.
2925 If so, delete the one we just made and move on; if not, add
2926 it. */
2927 VTS* identical_version = NULL;
2928 UWord valW = 12345;
2929 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
2930 (UWord)new_vts)) {
2931 // already have it
2932 tl_assert(valW == 0);
2933 tl_assert(identical_version != NULL);
2934 tl_assert(identical_version != new_vts);
2935 VTS__delete(new_vts);
2936 new_vts = identical_version;
2937 tl_assert(new_vts->id != VtsID_INVALID);
2938 } else {
2939 tl_assert(valW == 12345);
2940 tl_assert(identical_version == NULL);
2941 new_vts->id = new_VtsID_ctr++;
2942 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
2943 tl_assert(!b);
2944 VtsTE new_te;
2945 new_te.vts = new_vts;
2946 new_te.rc = 0;
2947 new_te.freelink = VtsID_INVALID;
2948 new_te.remap = VtsID_INVALID;
2949 Word j = VG_(addToXA)( new_tab, &new_te );
2950 tl_assert(j <= i);
2951 tl_assert(j == new_VtsID_ctr - 1);
2952 // stats
2953 nAfterPruning++;
2954 nSTSsAfter += new_vts->usedTS;
2955 }
2956 old_te->remap = new_vts->id;
2957
2958 } /* for (i = 0; i < nTab; i++) */
2959
2960 /* At this point, we have:
2961 * the old VTS table, with its .remap entries set,
2962 and with all .vts == NULL.
2963 * the old VTS tree should be empty, since it and the old VTSs
2964 it contained have been incrementally deleted was we worked
2965 through the old table.
2966 * the new VTS table, with all .rc == 0, all .freelink and .remap
2967 == VtsID_INVALID.
2968 * the new VTS tree.
2969 */
2970 tl_assert( VG_(sizeFM)(vts_set) == 0 );
2971
2972 /* Now actually apply the mapping. */
2973 /* Visit all the VtsIDs in the entire system. Where do we expect
2974 to find them?
2975 (a) in shadow memory -- the LineZs and LineFs
2976 (b) in our collection of struct _Thrs.
2977 (c) in our collection of struct _SOs.
2978 Nowhere else, AFAICS. Not in the zsm cache, because that just
2979 got invalidated.
2980
2981 Using the .remap fields in vts_tab, map each old VtsID to a new
2982 VtsID. For each old VtsID, dec its rc; and for each new one,
2983 inc it. This sets up the new refcounts, and it also gives a
2984 cheap sanity check of the old ones: all old refcounts should be
2985 zero after this operation.
2986 */
2987
2988 /* Do the mappings for (a) above: iterate over the Primary shadow
2989 mem map (WordFM Addr SecMap*). */
2990 UWord secmapW = 0;
2991 VG_(initIterFM)( map_shmem );
2992 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
2993 UWord j;
2994 SecMap* sm = (SecMap*)secmapW;
2995 tl_assert(sm->magic == SecMap_MAGIC);
2996 /* Deal with the LineZs */
2997 for (i = 0; i < N_SECMAP_ZLINES; i++) {
2998 LineZ* lineZ = &sm->linesZ[i];
2999 if (lineZ->dict[0] == SVal_INVALID)
3000 continue; /* not in use -- data is in F rep instead */
3001 for (j = 0; j < 4; j++)
3002 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3003 }
3004 /* Deal with the LineFs */
3005 for (i = 0; i < sm->linesF_size; i++) {
3006 LineF* lineF = &sm->linesF[i];
3007 if (!lineF->inUse)
3008 continue;
3009 for (j = 0; j < N_LINE_ARANGE; j++)
3010 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3011 }
3012 }
3013 VG_(doneIterFM)( map_shmem );
3014
3015 /* Do the mappings for (b) above: visit our collection of struct
3016 _Thrs. */
3017 Thread* hgthread = get_admin_threads();
3018 tl_assert(hgthread);
3019 while (hgthread) {
3020 Thr* hbthr = hgthread->hbthr;
3021 tl_assert(hbthr);
3022 /* Threads that are listed in the prunable set have their viR
3023 and viW set to VtsID_INVALID, so we can't mess with them. */
3024 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3025 tl_assert(hbthr->viR == VtsID_INVALID);
3026 tl_assert(hbthr->viW == VtsID_INVALID);
3027 hgthread = hgthread->admin;
3028 continue;
3029 }
3030 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3031 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3032 hgthread = hgthread->admin;
3033 }
3034
3035 /* Do the mappings for (c) above: visit the struct _SOs. */
3036 SO* so = admin_SO;
3037 while (so) {
3038 if (so->viR != VtsID_INVALID)
3039 remap_VtsID( vts_tab, new_tab, &so->viR );
3040 if (so->viW != VtsID_INVALID)
3041 remap_VtsID( vts_tab, new_tab, &so->viW );
3042 so = so->admin_next;
3043 }
3044
3045 /* So, we're nearly done (with this incredibly complex operation).
3046 Check the refcounts for the old VtsIDs all fell to zero, as
3047 expected. Any failure is serious. */
3048 for (i = 0; i < nTab; i++) {
3049 VtsTE* te = VG_(indexXA)( vts_tab, i );
3050 tl_assert(te->vts == NULL);
3051 /* This is the assert proper. Note we're also asserting
3052 zeroness for old entries which are unmapped (hence have
3053 .remap == VtsID_INVALID). That's OK. */
3054 tl_assert(te->rc == 0);
3055 }
3056
3057 /* Install the new table and set. */
3058 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3059 vts_set = new_set;
3060 VG_(deleteXA)( vts_tab );
3061 vts_tab = new_tab;
3062
3063 /* The freelist of vts_tab entries is empty now, because we've
3064 compacted all of the live entries at the low end of the
3065 table. */
3066 vts_tab_freelist = VtsID_INVALID;
3067
3068 /* Sanity check vts_set and vts_tab. */
3069
3070 /* Because all the live entries got slid down to the bottom of vts_tab: */
3071 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3072
3073 /* Assert that the vts_tab and vts_set entries point at each other
3074 in the required way */
3075 UWord wordK = 0, wordV = 0;
3076 VG_(initIterFM)( vts_set );
3077 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3078 tl_assert(wordK != 0);
3079 tl_assert(wordV == 0);
3080 VTS* vts = (VTS*)wordK;
3081 tl_assert(vts->id != VtsID_INVALID);
3082 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3083 tl_assert(te->vts == vts);
3084 }
3085 VG_(doneIterFM)( vts_set );
3086
3087 /* Also iterate over the table, and check each entry is
3088 plausible. */
3089 nTab = VG_(sizeXA)( vts_tab );
3090 for (i = 0; i < nTab; i++) {
3091 VtsTE* te = VG_(indexXA)( vts_tab, i );
3092 tl_assert(te->vts);
3093 tl_assert(te->vts->id == i);
3094 tl_assert(te->rc > 0); /* 'cos we just GC'd */
3095 tl_assert(te->freelink == VtsID_INVALID); /* in use */
3096 tl_assert(te->remap == VtsID_INVALID); /* not relevant */
3097 }
3098
3099 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
3100 if (VG_(clo_stats)) {
3101 static UInt ctr = 1;
3102 tl_assert(nTab > 0);
3103 VG_(message)(
3104 Vg_DebugMsg,
3105 "libhb: VTS PR: #%u before %lu (avg sz %lu) "
3106 "after %lu (avg sz %lu)\n",
3107 ctr++,
3108 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3109 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3110 );
3111 }
3112 if (0)
3113 VG_(printf)("VTQ: before pruning %lu (avg sz %lu), "
3114 "after pruning %lu (avg sz %lu)\n",
3115 nBeforePruning, nSTSsBefore / nBeforePruning,
3116 nAfterPruning, nSTSsAfter / nAfterPruning);
3117 /* ---------- END VTS PRUNING ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00003118}
3119
3120
3121/////////////////////////////////////////////////////////
3122// //
3123// Vts IDs //
3124// //
3125/////////////////////////////////////////////////////////
3126
3127//////////////////////////
sewardj7aa38a92011-02-27 23:04:12 +00003128/* A temporary, max-sized VTS which is used as a temporary (the first
3129 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3130static VTS* temp_max_sized_VTS = NULL;
3131
3132//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00003133static ULong stats__cmpLEQ_queries = 0;
3134static ULong stats__cmpLEQ_misses = 0;
3135static ULong stats__join2_queries = 0;
3136static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003137
3138static inline UInt ROL32 ( UInt w, Int n ) {
3139 w = (w << n) | (w >> (32-n));
3140 return w;
3141}
3142static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3143 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3144 return hash % nTab;
3145}
3146
sewardj23f12002009-07-24 08:45:08 +00003147#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00003148static
sewardj23f12002009-07-24 08:45:08 +00003149 struct { VtsID vi1; VtsID vi2; Bool leq; }
3150 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00003151
3152#define N_JOIN2_CACHE 1023
3153static
3154 struct { VtsID vi1; VtsID vi2; VtsID res; }
3155 join2_cache[N_JOIN2_CACHE];
3156
3157static void VtsID__invalidate_caches ( void ) {
3158 Int i;
sewardj23f12002009-07-24 08:45:08 +00003159 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3160 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3161 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3162 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00003163 }
3164 for (i = 0; i < N_JOIN2_CACHE; i++) {
3165 join2_cache[i].vi1 = VtsID_INVALID;
3166 join2_cache[i].vi2 = VtsID_INVALID;
3167 join2_cache[i].res = VtsID_INVALID;
3168 }
3169}
3170//////////////////////////
3171
sewardjd52392d2008-11-08 20:36:26 +00003172//static Bool VtsID__is_valid ( VtsID vi ) {
3173// VtsTE* ve;
3174// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3175// return False;
3176// ve = VG_(indexXA)( vts_tab, vi );
3177// if (!ve->vts)
3178// return False;
3179// tl_assert(ve->vts->id == vi);
3180// return True;
3181//}
sewardjf98e1c02008-10-25 16:22:41 +00003182
3183static VTS* VtsID__to_VTS ( VtsID vi ) {
3184 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3185 tl_assert(te->vts);
3186 return te->vts;
3187}
3188
3189static void VtsID__pp ( VtsID vi ) {
sewardjf98e1c02008-10-25 16:22:41 +00003190 VTS* vts = VtsID__to_VTS(vi);
florianb28fe892014-10-28 20:52:07 +00003191 VTS__show( vts );
sewardjf98e1c02008-10-25 16:22:41 +00003192}
3193
3194/* compute partial ordering relation of vi1 and vi2. */
3195__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00003196static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00003197 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00003198 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00003199 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00003200 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00003201 tl_assert(vi1 != vi2);
3202 ////++
sewardj23f12002009-07-24 08:45:08 +00003203 stats__cmpLEQ_queries++;
3204 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3205 if (cmpLEQ_cache[hash].vi1 == vi1
3206 && cmpLEQ_cache[hash].vi2 == vi2)
3207 return cmpLEQ_cache[hash].leq;
3208 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00003209 ////--
3210 v1 = VtsID__to_VTS(vi1);
3211 v2 = VtsID__to_VTS(vi2);
sewardje4cce742011-02-24 15:25:24 +00003212 leq = VTS__cmpLEQ( v1, v2 ) == 0;
sewardjf98e1c02008-10-25 16:22:41 +00003213 ////++
sewardj23f12002009-07-24 08:45:08 +00003214 cmpLEQ_cache[hash].vi1 = vi1;
3215 cmpLEQ_cache[hash].vi2 = vi2;
3216 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00003217 ////--
sewardj23f12002009-07-24 08:45:08 +00003218 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00003219}
sewardj23f12002009-07-24 08:45:08 +00003220static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3221 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003222}
3223
3224/* compute binary join */
3225__attribute__((noinline))
3226static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3227 UInt hash;
3228 VtsID res;
sewardj7aa38a92011-02-27 23:04:12 +00003229 VTS *vts1, *vts2;
sewardjf98e1c02008-10-25 16:22:41 +00003230 //if (vi1 == vi2) return vi1;
3231 tl_assert(vi1 != vi2);
3232 ////++
3233 stats__join2_queries++;
3234 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3235 if (join2_cache[hash].vi1 == vi1
3236 && join2_cache[hash].vi2 == vi2)
3237 return join2_cache[hash].res;
3238 stats__join2_misses++;
3239 ////--
3240 vts1 = VtsID__to_VTS(vi1);
3241 vts2 = VtsID__to_VTS(vi2);
sewardj7aa38a92011-02-27 23:04:12 +00003242 temp_max_sized_VTS->usedTS = 0;
3243 VTS__join(temp_max_sized_VTS, vts1,vts2);
3244 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003245 ////++
3246 join2_cache[hash].vi1 = vi1;
3247 join2_cache[hash].vi2 = vi2;
3248 join2_cache[hash].res = res;
3249 ////--
3250 return res;
3251}
3252static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003253 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003254}
3255
3256/* create a singleton VTS, namely [thr:1] */
3257static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
sewardj7aa38a92011-02-27 23:04:12 +00003258 temp_max_sized_VTS->usedTS = 0;
3259 VTS__singleton(temp_max_sized_VTS, thr,tym);
3260 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003261}
3262
3263/* tick operation, creates value 1 if specified index is absent */
3264static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3265 VTS* vts = VtsID__to_VTS(vi);
sewardj7aa38a92011-02-27 23:04:12 +00003266 temp_max_sized_VTS->usedTS = 0;
3267 VTS__tick(temp_max_sized_VTS, idx,vts);
3268 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003269}
3270
3271/* index into a VTS (only for assertions) */
3272static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3273 VTS* vts = VtsID__to_VTS(vi);
3274 return VTS__indexAt_SLOW( vts, idx );
3275}
3276
sewardj23f12002009-07-24 08:45:08 +00003277/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3278 any, really) element in vi1 which is pointwise greater-than the
3279 corresponding element in vi2. If no such element exists, return
3280 NULL. This needs to be fairly quick since it is called every time
3281 a race is detected. */
3282static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3283{
3284 VTS *vts1, *vts2;
sewardje4cce742011-02-24 15:25:24 +00003285 Thr* diffthr;
3286 ThrID diffthrid;
sewardj23f12002009-07-24 08:45:08 +00003287 tl_assert(vi1 != vi2);
3288 vts1 = VtsID__to_VTS(vi1);
3289 vts2 = VtsID__to_VTS(vi2);
3290 tl_assert(vts1 != vts2);
sewardje4cce742011-02-24 15:25:24 +00003291 diffthrid = VTS__cmpLEQ(vts1, vts2);
3292 diffthr = Thr__from_ThrID(diffthrid);
sewardj23f12002009-07-24 08:45:08 +00003293 tl_assert(diffthr); /* else they are LEQ ! */
3294 return diffthr;
3295}
3296
3297
3298/////////////////////////////////////////////////////////
3299// //
3300// Filters //
3301// //
3302/////////////////////////////////////////////////////////
3303
sewardj23f12002009-07-24 08:45:08 +00003304/* Forget everything we know -- clear the filter and let everything
3305 through. This needs to be as fast as possible, since it is called
3306 every time the running thread changes, and every time a thread's
3307 vector clocks change, which can be quite frequent. The obvious
3308 fast way to do this is simply to stuff in tags which we know are
3309 not going to match anything, since they're not aligned to the start
3310 of a line. */
florian6bd9dc12012-11-23 16:17:43 +00003311static void Filter__clear ( Filter* fi, const HChar* who )
sewardj23f12002009-07-24 08:45:08 +00003312{
3313 UWord i;
3314 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3315 for (i = 0; i < FI_NUM_LINES; i += 8) {
3316 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3317 fi->tags[i+1] = 1;
3318 fi->tags[i+2] = 1;
3319 fi->tags[i+3] = 1;
3320 fi->tags[i+4] = 1;
3321 fi->tags[i+5] = 1;
3322 fi->tags[i+6] = 1;
3323 fi->tags[i+7] = 1;
3324 }
3325 tl_assert(i == FI_NUM_LINES);
3326}
3327
3328/* Clearing an arbitrary range in the filter. Unfortunately
3329 we have to do this due to core-supplied new/die-mem events. */
3330
3331static void Filter__clear_1byte ( Filter* fi, Addr a )
3332{
3333 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3334 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3335 FiLine* line = &fi->lines[lineno];
3336 UWord loff = (a - atag) / 8;
3337 UShort mask = 0x3 << (2 * (a & 7));
3338 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3339 if (LIKELY( fi->tags[lineno] == atag )) {
3340 /* hit. clear the bits. */
3341 UShort u16 = line->u16s[loff];
3342 line->u16s[loff] = u16 & ~mask; /* clear them */
3343 } else {
3344 /* miss. The filter doesn't hold this address, so ignore. */
3345 }
3346}
3347
3348static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3349{
3350 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3351 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3352 FiLine* line = &fi->lines[lineno];
3353 UWord loff = (a - atag) / 8;
3354 if (LIKELY( fi->tags[lineno] == atag )) {
3355 line->u16s[loff] = 0;
3356 } else {
3357 /* miss. The filter doesn't hold this address, so ignore. */
3358 }
3359}
3360
3361static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3362{
3363 //VG_(printf)("%lu ", len);
3364 /* slowly do part preceding 8-alignment */
3365 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3366 Filter__clear_1byte( fi, a );
3367 a++;
3368 len--;
3369 }
3370 /* vector loop */
3371 while (len >= 8) {
3372 Filter__clear_8bytes_aligned( fi, a );
3373 a += 8;
3374 len -= 8;
3375 }
3376 /* slowly do tail */
3377 while (UNLIKELY(len > 0)) {
3378 Filter__clear_1byte( fi, a );
3379 a++;
3380 len--;
3381 }
3382}
3383
3384
3385/* ------ Read handlers for the filter. ------ */
3386
3387static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3388{
3389 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3390 return False;
3391 {
3392 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3393 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3394 FiLine* line = &fi->lines[lineno];
3395 UWord loff = (a - atag) / 8;
3396 UShort mask = 0xAAAA;
3397 if (LIKELY( fi->tags[lineno] == atag )) {
3398 /* hit. check line and update. */
3399 UShort u16 = line->u16s[loff];
3400 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3401 line->u16s[loff] = u16 | mask; /* set them */
3402 return ok;
3403 } else {
3404 /* miss. nuke existing line and re-use it. */
3405 UWord i;
3406 fi->tags[lineno] = atag;
3407 for (i = 0; i < FI_LINE_SZB / 8; i++)
3408 line->u16s[i] = 0;
3409 line->u16s[loff] = mask;
3410 return False;
3411 }
3412 }
3413}
3414
3415static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3416{
3417 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3418 return False;
3419 {
3420 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3421 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3422 FiLine* line = &fi->lines[lineno];
3423 UWord loff = (a - atag) / 8;
3424 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3425 if (LIKELY( fi->tags[lineno] == atag )) {
3426 /* hit. check line and update. */
3427 UShort u16 = line->u16s[loff];
3428 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3429 line->u16s[loff] = u16 | mask; /* set them */
3430 return ok;
3431 } else {
3432 /* miss. nuke existing line and re-use it. */
3433 UWord i;
3434 fi->tags[lineno] = atag;
3435 for (i = 0; i < FI_LINE_SZB / 8; i++)
3436 line->u16s[i] = 0;
3437 line->u16s[loff] = mask;
3438 return False;
3439 }
3440 }
3441}
3442
3443static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3444{
3445 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3446 return False;
3447 {
3448 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3449 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3450 FiLine* line = &fi->lines[lineno];
3451 UWord loff = (a - atag) / 8;
3452 UShort mask = 0xA << (2 * (a & 6));
3453 /* mask is A000, 0A00, 00A0 or 000A */
3454 if (LIKELY( fi->tags[lineno] == atag )) {
3455 /* hit. check line and update. */
3456 UShort u16 = line->u16s[loff];
3457 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3458 line->u16s[loff] = u16 | mask; /* set them */
3459 return ok;
3460 } else {
3461 /* miss. nuke existing line and re-use it. */
3462 UWord i;
3463 fi->tags[lineno] = atag;
3464 for (i = 0; i < FI_LINE_SZB / 8; i++)
3465 line->u16s[i] = 0;
3466 line->u16s[loff] = mask;
3467 return False;
3468 }
3469 }
3470}
3471
3472static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3473{
3474 {
3475 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3476 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3477 FiLine* line = &fi->lines[lineno];
3478 UWord loff = (a - atag) / 8;
3479 UShort mask = 0x2 << (2 * (a & 7));
3480 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3481 if (LIKELY( fi->tags[lineno] == atag )) {
3482 /* hit. check line and update. */
3483 UShort u16 = line->u16s[loff];
3484 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3485 line->u16s[loff] = u16 | mask; /* set them */
3486 return ok;
3487 } else {
3488 /* miss. nuke existing line and re-use it. */
3489 UWord i;
3490 fi->tags[lineno] = atag;
3491 for (i = 0; i < FI_LINE_SZB / 8; i++)
3492 line->u16s[i] = 0;
3493 line->u16s[loff] = mask;
3494 return False;
3495 }
3496 }
3497}
3498
3499
3500/* ------ Write handlers for the filter. ------ */
3501
3502static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3503{
3504 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3505 return False;
3506 {
3507 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3508 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3509 FiLine* line = &fi->lines[lineno];
3510 UWord loff = (a - atag) / 8;
3511 UShort mask = 0xFFFF;
3512 if (LIKELY( fi->tags[lineno] == atag )) {
3513 /* hit. check line and update. */
3514 UShort u16 = line->u16s[loff];
3515 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3516 line->u16s[loff] = u16 | mask; /* set them */
3517 return ok;
3518 } else {
3519 /* miss. nuke existing line and re-use it. */
3520 UWord i;
3521 fi->tags[lineno] = atag;
3522 for (i = 0; i < FI_LINE_SZB / 8; i++)
3523 line->u16s[i] = 0;
3524 line->u16s[loff] = mask;
3525 return False;
3526 }
3527 }
3528}
3529
3530static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3531{
3532 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3533 return False;
3534 {
3535 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3536 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3537 FiLine* line = &fi->lines[lineno];
3538 UWord loff = (a - atag) / 8;
3539 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3540 if (LIKELY( fi->tags[lineno] == atag )) {
3541 /* hit. check line and update. */
3542 UShort u16 = line->u16s[loff];
3543 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3544 line->u16s[loff] = u16 | mask; /* set them */
3545 return ok;
3546 } else {
3547 /* miss. nuke existing line and re-use it. */
3548 UWord i;
3549 fi->tags[lineno] = atag;
3550 for (i = 0; i < FI_LINE_SZB / 8; i++)
3551 line->u16s[i] = 0;
3552 line->u16s[loff] = mask;
3553 return False;
3554 }
3555 }
3556}
3557
3558static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3559{
3560 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3561 return False;
3562 {
3563 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3564 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3565 FiLine* line = &fi->lines[lineno];
3566 UWord loff = (a - atag) / 8;
3567 UShort mask = 0xF << (2 * (a & 6));
3568 /* mask is F000, 0F00, 00F0 or 000F */
3569 if (LIKELY( fi->tags[lineno] == atag )) {
3570 /* hit. check line and update. */
3571 UShort u16 = line->u16s[loff];
3572 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3573 line->u16s[loff] = u16 | mask; /* set them */
3574 return ok;
3575 } else {
3576 /* miss. nuke existing line and re-use it. */
3577 UWord i;
3578 fi->tags[lineno] = atag;
3579 for (i = 0; i < FI_LINE_SZB / 8; i++)
3580 line->u16s[i] = 0;
3581 line->u16s[loff] = mask;
3582 return False;
3583 }
3584 }
3585}
3586
3587static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3588{
3589 {
3590 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3591 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3592 FiLine* line = &fi->lines[lineno];
3593 UWord loff = (a - atag) / 8;
3594 UShort mask = 0x3 << (2 * (a & 7));
3595 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3596 if (LIKELY( fi->tags[lineno] == atag )) {
3597 /* hit. check line and update. */
3598 UShort u16 = line->u16s[loff];
3599 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3600 line->u16s[loff] = u16 | mask; /* set them */
3601 return ok;
3602 } else {
3603 /* miss. nuke existing line and re-use it. */
3604 UWord i;
3605 fi->tags[lineno] = atag;
3606 for (i = 0; i < FI_LINE_SZB / 8; i++)
3607 line->u16s[i] = 0;
3608 line->u16s[loff] = mask;
3609 return False;
3610 }
3611 }
3612}
3613
sewardjf98e1c02008-10-25 16:22:41 +00003614
3615/////////////////////////////////////////////////////////
3616// //
3617// Threads //
3618// //
3619/////////////////////////////////////////////////////////
3620
sewardje4cce742011-02-24 15:25:24 +00003621/* Maps ThrID values to their Thr*s (which contain ThrID values that
3622 should point back to the relevant slot in the array. Lowest
3623 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
3624static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
3625
3626/* And a counter to dole out ThrID values. For rationale/background,
3627 see comments on definition of ScalarTS (far) above. */
sewardj7aa38a92011-02-27 23:04:12 +00003628static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
sewardje4cce742011-02-24 15:25:24 +00003629
3630static ThrID Thr__to_ThrID ( Thr* thr ) {
3631 return thr->thrid;
3632}
3633static Thr* Thr__from_ThrID ( UInt thrid ) {
3634 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
3635 tl_assert(thr->thrid == thrid);
3636 return thr;
3637}
3638
3639static Thr* Thr__new ( void )
3640{
sewardjf98e1c02008-10-25 16:22:41 +00003641 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
3642 thr->viR = VtsID_INVALID;
3643 thr->viW = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003644 thr->llexit_done = False;
3645 thr->joinedwith_done = False;
sewardj23f12002009-07-24 08:45:08 +00003646 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
philippeca903bb2014-04-26 22:50:08 +00003647 if (HG_(clo_history_level) == 1)
3648 thr->local_Kws_n_stacks
3649 = VG_(newXA)( HG_(zalloc),
3650 "libhb.Thr__new.3 (local_Kws_and_stacks)",
3651 HG_(free), sizeof(ULong_n_EC) );
sewardje4cce742011-02-24 15:25:24 +00003652
3653 /* Add this Thr* <-> ThrID binding to the mapping, and
3654 cross-check */
3655 if (!thrid_to_thr_map) {
3656 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
3657 HG_(free), sizeof(Thr*) );
sewardje4cce742011-02-24 15:25:24 +00003658 }
3659
sewardj7aa38a92011-02-27 23:04:12 +00003660 if (thrid_counter >= ThrID_MAX_VALID) {
sewardje4cce742011-02-24 15:25:24 +00003661 /* We're hosed. We have to stop. */
3662 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
3663 }
3664
3665 thr->thrid = thrid_counter++;
3666 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
3667 tl_assert(ix + 1024 == thr->thrid);
3668
sewardjf98e1c02008-10-25 16:22:41 +00003669 return thr;
3670}
3671
sewardj8ab2c132009-08-02 09:34:35 +00003672static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00003673{
3674 Word nPresent;
3675 ULong_n_EC pair;
3676 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00003677
3678 // We only collect this info at history level 1 (approx)
3679 if (HG_(clo_history_level) != 1)
3680 return;
3681
sewardj8ab2c132009-08-02 09:34:35 +00003682 /* This is the scalar Kw for thr. */
3683 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00003684 pair.ec = main_get_EC( thr );
3685 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00003686 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00003687
3688 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00003689 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00003690
3691 /* Throw away old stacks, if necessary. We can't accumulate stuff
3692 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00003693 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
3694 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
3695 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
3696 if (0)
3697 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00003698 thr, pair.ull, pair.ec );
3699 }
3700
3701 if (nPresent > 0) {
3702 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00003703 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
3704 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00003705 }
3706
3707 if (nPresent == 0)
3708 pair.ec = NULL;
3709
sewardj8ab2c132009-08-02 09:34:35 +00003710 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00003711
3712 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00003713 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00003714 thr, pair.ull, pair.ec );
3715 if (0)
3716 VG_(pp_ExeContext)(pair.ec);
3717}
3718
florian6bd9dc12012-11-23 16:17:43 +00003719static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
3720 const ULong_n_EC* pair2 )
sewardj23f12002009-07-24 08:45:08 +00003721{
3722 if (pair1->ull < pair2->ull) return -1;
3723 if (pair1->ull > pair2->ull) return 1;
3724 return 0;
3725}
3726
sewardjf98e1c02008-10-25 16:22:41 +00003727
3728/////////////////////////////////////////////////////////
3729// //
3730// Shadow Values //
3731// //
3732/////////////////////////////////////////////////////////
3733
3734// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
3735// hb_zsm.h. We have to do everything else here.
3736
3737/* SVal is 64 bit unsigned int.
3738
3739 <---------30---------> <---------30--------->
3740 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00003741 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00003742 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
3743
sewardjf98e1c02008-10-25 16:22:41 +00003744*/
3745#define SVAL_TAGMASK (3ULL << 62)
3746
3747static inline Bool SVal__isC ( SVal s ) {
3748 return (0ULL << 62) == (s & SVAL_TAGMASK);
3749}
3750static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
3751 //tl_assert(VtsID__is_valid(rmini));
3752 //tl_assert(VtsID__is_valid(wmini));
3753 return (((ULong)rmini) << 32) | ((ULong)wmini);
3754}
3755static inline VtsID SVal__unC_Rmin ( SVal s ) {
3756 tl_assert(SVal__isC(s));
3757 return (VtsID)(s >> 32);
3758}
3759static inline VtsID SVal__unC_Wmin ( SVal s ) {
3760 tl_assert(SVal__isC(s));
3761 return (VtsID)(s & 0xFFFFFFFFULL);
3762}
3763
sewardj23f12002009-07-24 08:45:08 +00003764static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00003765 return (2ULL << 62) == (s & SVAL_TAGMASK);
3766}
sewardj5aa09bf2014-06-20 14:25:53 +00003767__attribute__((unused))
sewardj23f12002009-07-24 08:45:08 +00003768static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00003769 return 2ULL << 62;
3770}
3771
3772/* Direct callback from lib_zsm. */
3773static void SVal__rcinc ( SVal s ) {
3774 if (SVal__isC(s)) {
3775 VtsID__rcinc( SVal__unC_Rmin(s) );
3776 VtsID__rcinc( SVal__unC_Wmin(s) );
3777 }
3778}
3779
3780/* Direct callback from lib_zsm. */
3781static void SVal__rcdec ( SVal s ) {
3782 if (SVal__isC(s)) {
3783 VtsID__rcdec( SVal__unC_Rmin(s) );
3784 VtsID__rcdec( SVal__unC_Wmin(s) );
3785 }
3786}
3787
3788
3789/////////////////////////////////////////////////////////
3790// //
3791// Change-event map2 //
3792// //
3793/////////////////////////////////////////////////////////
3794
sewardjf98e1c02008-10-25 16:22:41 +00003795#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
3796
3797/* This is in two parts:
3798
sewardj23f12002009-07-24 08:45:08 +00003799 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00003800 traces. When the reference count of a stack trace becomes zero,
3801 it is removed from the set and freed up. The intent is to have
3802 a set of stack traces which can be referred to from (2), but to
3803 only represent each one once. The set is indexed/searched by
3804 ordering on the stack trace vectors.
3805
sewardj849b0ed2008-12-21 10:43:10 +00003806 2. A SparseWA of OldRefs. These store information about each old
3807 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00003808 location for which the information is recorded. For LRU
3809 purposes, each OldRef also contains a generation number,
3810 indicating when it was most recently accessed.
3811
3812 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00003813 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3814 size) triples to RCECs. This allows us to collect the last
3815 access-traceback by up to N_OLDREF_ACCS different triples for
3816 this location. The accs[] array is a MTF-array. If a binding
3817 falls off the end, that's too bad -- we will lose info about
3818 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00003819
sewardj849b0ed2008-12-21 10:43:10 +00003820 When the SparseWA becomes too big, we can throw away the OldRefs
sewardjf98e1c02008-10-25 16:22:41 +00003821 whose generation numbers are below some threshold; hence doing
3822 approximate LRU discarding. For each discarded OldRef we must
3823 of course decrement the reference count on the all RCECs it
3824 refers to, in order that entries from (1) eventually get
3825 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00003826
3827 A major improvement in reliability of this mechanism would be to
3828 have a dynamically sized OldRef.accs[] array, so no entries ever
3829 fall off the end. In investigations (Dec 08) it appears that a
3830 major cause for the non-availability of conflicting-access traces
3831 in race reports is caused by the fixed size of this array. I
3832 suspect for most OldRefs, only a few entries are used, but for a
3833 minority of cases there is an overflow, leading to info lossage.
3834 Investigations also suggest this is very workload and scheduling
3835 sensitive. Therefore a dynamic sizing would be better.
3836
philippe6643e962012-01-17 21:16:30 +00003837 However, dynamic sizing would defeat the use of a PoolAllocator
sewardj849b0ed2008-12-21 10:43:10 +00003838 for OldRef structures. And that's important for performance. So
3839 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00003840*/
3841
3842
3843static UWord stats__ctxt_rcdec1 = 0;
3844static UWord stats__ctxt_rcdec2 = 0;
3845static UWord stats__ctxt_rcdec3 = 0;
3846static UWord stats__ctxt_rcdec_calls = 0;
3847static UWord stats__ctxt_rcdec_discards = 0;
3848static UWord stats__ctxt_rcdec1_eq = 0;
3849
3850static UWord stats__ctxt_tab_curr = 0;
3851static UWord stats__ctxt_tab_max = 0;
3852
3853static UWord stats__ctxt_tab_qs = 0;
3854static UWord stats__ctxt_tab_cmps = 0;
3855
3856
3857///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00003858//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00003859///
3860
3861#define N_FRAMES 8
3862
3863// (UInt) `echo "Reference Counted Execution Context" | md5sum`
3864#define RCEC_MAGIC 0xab88abb2UL
3865
3866//#define N_RCEC_TAB 98317 /* prime */
3867#define N_RCEC_TAB 196613 /* prime */
3868
3869typedef
3870 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00003871 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003872 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00003873 UWord rc;
3874 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00003875 UWord frames_hash; /* hash of all the frames */
3876 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00003877 }
3878 RCEC;
3879
3880static RCEC** contextTab = NULL; /* hash table of RCEC*s */
3881
3882
3883/* Gives an arbitrary total order on RCEC .frames fields */
3884static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
3885 Word i;
3886 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
3887 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00003888 if (ec1->frames_hash < ec2->frames_hash) return -1;
3889 if (ec1->frames_hash > ec2->frames_hash) return 1;
3890 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003891 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00003892 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00003893 }
3894 return 0;
3895}
3896
3897
3898/* Dec the ref of this RCEC. */
3899static void ctxt__rcdec ( RCEC* ec )
3900{
3901 stats__ctxt_rcdec_calls++;
3902 tl_assert(ec && ec->magic == RCEC_MAGIC);
3903 tl_assert(ec->rc > 0);
3904 ec->rc--;
3905}
3906
3907static void ctxt__rcinc ( RCEC* ec )
3908{
3909 tl_assert(ec && ec->magic == RCEC_MAGIC);
3910 ec->rc++;
3911}
3912
3913
philippe6643e962012-01-17 21:16:30 +00003914//////////// BEGIN RCEC pool allocator
3915static PoolAlloc* rcec_pool_allocator;
sewardjd86e3a22008-12-03 11:39:37 +00003916
3917static RCEC* alloc_RCEC ( void ) {
philippe6643e962012-01-17 21:16:30 +00003918 return VG_(allocEltPA) ( rcec_pool_allocator );
sewardjd86e3a22008-12-03 11:39:37 +00003919}
3920
3921static void free_RCEC ( RCEC* rcec ) {
3922 tl_assert(rcec->magic == RCEC_MAGIC);
philippe6643e962012-01-17 21:16:30 +00003923 VG_(freeEltPA)( rcec_pool_allocator, rcec );
sewardjd86e3a22008-12-03 11:39:37 +00003924}
philippe6643e962012-01-17 21:16:30 +00003925//////////// END RCEC pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00003926
3927
sewardjf98e1c02008-10-25 16:22:41 +00003928/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
3929 move it one step closer the the front of the list, so as to make
3930 subsequent searches for it cheaper. */
3931static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
3932{
3933 RCEC *ec0, *ec1, *ec2;
3934 if (ec == *headp)
3935 tl_assert(0); /* already at head of list */
3936 tl_assert(ec != NULL);
3937 ec0 = *headp;
3938 ec1 = NULL;
3939 ec2 = NULL;
3940 while (True) {
3941 if (ec0 == NULL || ec0 == ec) break;
3942 ec2 = ec1;
3943 ec1 = ec0;
3944 ec0 = ec0->next;
3945 }
3946 tl_assert(ec0 == ec);
3947 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
3948 RCEC* tmp;
3949 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
3950 predecessor. Swap ec0 and ec1, that is, move ec0 one step
3951 closer to the start of the list. */
3952 tl_assert(ec2->next == ec1);
3953 tl_assert(ec1->next == ec0);
3954 tmp = ec0->next;
3955 ec2->next = ec0;
3956 ec0->next = ec1;
3957 ec1->next = tmp;
3958 }
3959 else
3960 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
3961 /* it's second in the list. */
3962 tl_assert(*headp == ec1);
3963 tl_assert(ec1->next == ec0);
3964 ec1->next = ec0->next;
3965 ec0->next = ec1;
3966 *headp = ec0;
3967 }
3968}
3969
3970
3971/* Find the given RCEC in the tree, and return a pointer to it. Or,
3972 if not present, add the given one to the tree (by making a copy of
3973 it, so the caller can immediately deallocate the original) and
3974 return a pointer to the copy. The caller can safely have 'example'
3975 on its stack, since we will always return a pointer to a copy of
3976 it, not to the original. Note that the inserted node will have .rc
3977 of zero and so the caller must immediatly increment it. */
3978__attribute__((noinline))
3979static RCEC* ctxt__find_or_add ( RCEC* example )
3980{
3981 UWord hent;
3982 RCEC* copy;
3983 tl_assert(example && example->magic == RCEC_MAGIC);
3984 tl_assert(example->rc == 0);
3985
3986 /* Search the hash table to see if we already have it. */
3987 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00003988 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00003989 copy = contextTab[hent];
3990 while (1) {
3991 if (!copy) break;
3992 tl_assert(copy->magic == RCEC_MAGIC);
3993 stats__ctxt_tab_cmps++;
3994 if (0 == RCEC__cmp_by_frames(copy, example)) break;
3995 copy = copy->next;
3996 }
3997
3998 if (copy) {
3999 tl_assert(copy != example);
4000 /* optimisation: if it's not at the head of its list, move 1
4001 step fwds, to make future searches cheaper */
4002 if (copy != contextTab[hent]) {
4003 move_RCEC_one_step_forward( &contextTab[hent], copy );
4004 }
4005 } else {
sewardjd86e3a22008-12-03 11:39:37 +00004006 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00004007 tl_assert(copy != example);
4008 *copy = *example;
4009 copy->next = contextTab[hent];
4010 contextTab[hent] = copy;
4011 stats__ctxt_tab_curr++;
4012 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4013 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4014 }
4015 return copy;
4016}
4017
4018static inline UWord ROLW ( UWord w, Int n )
4019{
4020 Int bpw = 8 * sizeof(UWord);
4021 w = (w << n) | (w >> (bpw-n));
4022 return w;
4023}
4024
4025__attribute__((noinline))
4026static RCEC* get_RCEC ( Thr* thr )
4027{
4028 UWord hash, i;
4029 RCEC example;
4030 example.magic = RCEC_MAGIC;
4031 example.rc = 0;
4032 example.rcX = 0;
florian195623b2013-01-22 00:25:05 +00004033 example.next = NULL;
njn6c83d5e2009-05-05 23:46:24 +00004034 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00004035 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00004036 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004037 hash ^= example.frames[i];
4038 hash = ROLW(hash, 19);
4039 }
njn6c83d5e2009-05-05 23:46:24 +00004040 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00004041 return ctxt__find_or_add( &example );
4042}
4043
4044///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00004045//// Part (2):
4046/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00004047///
4048
4049// (UInt) `echo "Old Reference Information" | md5sum`
4050#define OldRef_MAGIC 0x30b1f075UL
4051
sewardjffce8152011-06-24 10:09:41 +00004052/* Records an access: a thread, a context (size & writeness) and the
4053 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
4054 2, 10 = 4, 11 = 8.
sewardjc5ea9962008-12-07 01:41:46 +00004055*/
sewardjffce8152011-06-24 10:09:41 +00004056typedef
4057 struct {
4058 RCEC* rcec;
4059 WordSetID locksHeldW;
4060 UInt thrid : SCALARTS_N_THRBITS;
4061 UInt szLg2B : 2;
4062 UInt isW : 1;
4063 }
4064 Thr_n_RCEC;
sewardjf98e1c02008-10-25 16:22:41 +00004065
sewardj849b0ed2008-12-21 10:43:10 +00004066#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00004067
4068typedef
4069 struct {
sewardjd86e3a22008-12-03 11:39:37 +00004070 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00004071 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00004072 /* or free list when not in use */
sewardjffce8152011-06-24 10:09:41 +00004073 /* unused slots in this array have .thrid == 0, which is invalid */
sewardjf98e1c02008-10-25 16:22:41 +00004074 Thr_n_RCEC accs[N_OLDREF_ACCS];
4075 }
4076 OldRef;
4077
sewardjd86e3a22008-12-03 11:39:37 +00004078
philippe6643e962012-01-17 21:16:30 +00004079//////////// BEGIN OldRef pool allocator
4080static PoolAlloc* oldref_pool_allocator;
sewardjd86e3a22008-12-03 11:39:37 +00004081
4082static OldRef* alloc_OldRef ( void ) {
philippe6643e962012-01-17 21:16:30 +00004083 return VG_(allocEltPA) ( oldref_pool_allocator );
sewardjd86e3a22008-12-03 11:39:37 +00004084}
4085
4086static void free_OldRef ( OldRef* r ) {
4087 tl_assert(r->magic == OldRef_MAGIC);
philippe6643e962012-01-17 21:16:30 +00004088 VG_(freeEltPA)( oldref_pool_allocator, r );
sewardjd86e3a22008-12-03 11:39:37 +00004089}
philippe6643e962012-01-17 21:16:30 +00004090//////////// END OldRef pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00004091
sewardjd86e3a22008-12-03 11:39:37 +00004092
sewardjbc307e52008-12-06 22:10:54 +00004093static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
4094static UWord oldrefGen = 0; /* current LRU generation # */
4095static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
4096static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00004097
sewardj1669cc72008-12-13 01:20:21 +00004098inline static UInt min_UInt ( UInt a, UInt b ) {
4099 return a < b ? a : b;
4100}
4101
sewardja781be62008-12-08 00:12:28 +00004102/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4103 first interval is lower, 1 if the first interval is higher, and 0
4104 if there is any overlap. Redundant paranoia with casting is there
4105 following what looked distinctly like a bug in gcc-4.1.2, in which
4106 some of the comparisons were done signedly instead of
4107 unsignedly. */
4108/* Copied from exp-ptrcheck/sg_main.c */
4109static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4110 Addr a2, SizeT n2 ) {
4111 UWord a1w = (UWord)a1;
4112 UWord n1w = (UWord)n1;
4113 UWord a2w = (UWord)a2;
4114 UWord n2w = (UWord)n2;
4115 tl_assert(n1w > 0 && n2w > 0);
4116 if (a1w + n1w <= a2w) return -1L;
4117 if (a2w + n2w <= a1w) return 1L;
4118 return 0;
4119}
4120
sewardjc5ea9962008-12-07 01:41:46 +00004121static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00004122{
sewardjd86e3a22008-12-03 11:39:37 +00004123 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00004124 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00004125 Word i, j;
4126 UWord keyW, valW;
4127 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004128
sewardjffce8152011-06-24 10:09:41 +00004129 tl_assert(thr);
4130 ThrID thrid = thr->thrid;
4131 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4132
4133 WordSetID locksHeldW = thr->hgthread->locksetW;
4134
sewardjc5ea9962008-12-07 01:41:46 +00004135 rcec = get_RCEC( thr );
4136 ctxt__rcinc(rcec);
4137
sewardjffce8152011-06-24 10:09:41 +00004138 UInt szLg2B = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004139 switch (szB) {
4140 /* This doesn't look particularly branch-predictor friendly. */
sewardjffce8152011-06-24 10:09:41 +00004141 case 1: szLg2B = 0; break;
4142 case 2: szLg2B = 1; break;
4143 case 4: szLg2B = 2; break;
4144 case 8: szLg2B = 3; break;
sewardjc5ea9962008-12-07 01:41:46 +00004145 default: tl_assert(0);
4146 }
4147
sewardjffce8152011-06-24 10:09:41 +00004148 /* Look in the map to see if we already have a record for this
4149 address. */
sewardjbc307e52008-12-06 22:10:54 +00004150 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00004151
sewardjd86e3a22008-12-03 11:39:37 +00004152 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00004153
4154 /* We already have a record for this address. We now need to
sewardjffce8152011-06-24 10:09:41 +00004155 see if we have a stack trace pertaining to this (thrid, R/W,
sewardj849b0ed2008-12-21 10:43:10 +00004156 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00004157 tl_assert(keyW == a);
4158 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004159 tl_assert(ref->magic == OldRef_MAGIC);
4160
sewardjf98e1c02008-10-25 16:22:41 +00004161 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004162 if (ref->accs[i].thrid != thrid)
sewardj849b0ed2008-12-21 10:43:10 +00004163 continue;
sewardjffce8152011-06-24 10:09:41 +00004164 if (ref->accs[i].szLg2B != szLg2B)
4165 continue;
4166 if (ref->accs[i].isW != (UInt)(isW & 1))
sewardj849b0ed2008-12-21 10:43:10 +00004167 continue;
4168 /* else we have a match, so stop looking. */
4169 break;
sewardjf98e1c02008-10-25 16:22:41 +00004170 }
4171
4172 if (i < N_OLDREF_ACCS) {
sewardjffce8152011-06-24 10:09:41 +00004173 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */
sewardjf98e1c02008-10-25 16:22:41 +00004174 if (i > 0) {
4175 Thr_n_RCEC tmp = ref->accs[i-1];
4176 ref->accs[i-1] = ref->accs[i];
4177 ref->accs[i] = tmp;
4178 i--;
4179 }
sewardjc5ea9962008-12-07 01:41:46 +00004180 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00004181 stats__ctxt_rcdec1++;
sewardjffce8152011-06-24 10:09:41 +00004182 ctxt__rcdec( ref->accs[i].rcec );
4183 tl_assert(ref->accs[i].thrid == thrid);
4184 /* Update the RCEC and the W-held lockset. */
4185 ref->accs[i].rcec = rcec;
4186 ref->accs[i].locksHeldW = locksHeldW;
sewardjf98e1c02008-10-25 16:22:41 +00004187 } else {
sewardjffce8152011-06-24 10:09:41 +00004188 /* No entry for this (thread, R/W, size, nWHeld) quad.
4189 Shuffle all of them down one slot, and put the new entry
4190 at the start of the array. */
4191 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
sewardjf98e1c02008-10-25 16:22:41 +00004192 /* the last slot is in use. We must dec the rc on the
4193 associated rcec. */
4194 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
4195 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00004196 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
4197 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjffce8152011-06-24 10:09:41 +00004198 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
sewardjf98e1c02008-10-25 16:22:41 +00004199 } else {
4200 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
4201 }
4202 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
4203 ref->accs[j] = ref->accs[j-1];
sewardjffce8152011-06-24 10:09:41 +00004204 ref->accs[0].thrid = thrid;
4205 ref->accs[0].szLg2B = szLg2B;
4206 ref->accs[0].isW = (UInt)(isW & 1);
4207 ref->accs[0].locksHeldW = locksHeldW;
4208 ref->accs[0].rcec = rcec;
4209 /* thrid==0 is used to signify an empty slot, so we can't
4210 add zero thrid (such a ThrID is invalid anyway). */
4211 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
sewardjf98e1c02008-10-25 16:22:41 +00004212 }
4213
4214 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00004215
4216 } else {
4217
4218 /* We don't have a record for this address. Create a new one. */
4219 if (oldrefTreeN >= oldrefGenIncAt) {
4220 oldrefGen++;
4221 oldrefGenIncAt = oldrefTreeN + 50000;
4222 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
4223 oldrefGen, oldrefTreeN );
4224 }
sewardjd86e3a22008-12-03 11:39:37 +00004225
4226 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00004227 ref->magic = OldRef_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00004228 ref->gen = oldrefGen;
4229 ref->accs[0].thrid = thrid;
4230 ref->accs[0].szLg2B = szLg2B;
4231 ref->accs[0].isW = (UInt)(isW & 1);
4232 ref->accs[0].locksHeldW = locksHeldW;
4233 ref->accs[0].rcec = rcec;
4234
4235 /* thrid==0 is used to signify an empty slot, so we can't
4236 add zero thrid (such a ThrID is invalid anyway). */
4237 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4238
4239 /* Clear out the rest of the entries */
sewardjf98e1c02008-10-25 16:22:41 +00004240 for (j = 1; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004241 ref->accs[j].rcec = NULL;
4242 ref->accs[j].thrid = 0;
4243 ref->accs[j].szLg2B = 0;
4244 ref->accs[j].isW = 0;
4245 ref->accs[j].locksHeldW = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004246 }
sewardjbc307e52008-12-06 22:10:54 +00004247 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00004248 oldrefTreeN++;
4249
4250 }
4251}
4252
4253
sewardjffce8152011-06-24 10:09:41 +00004254/* Extract info from the conflicting-access machinery. */
sewardjc5ea9962008-12-07 01:41:46 +00004255Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjffce8152011-06-24 10:09:41 +00004256 /*OUT*/Thr** resThr,
4257 /*OUT*/SizeT* resSzB,
4258 /*OUT*/Bool* resIsW,
4259 /*OUT*/WordSetID* locksHeldW,
sewardjc5ea9962008-12-07 01:41:46 +00004260 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00004261{
sewardja781be62008-12-08 00:12:28 +00004262 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00004263 OldRef* ref;
4264 UWord keyW, valW;
4265 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004266
sewardjffce8152011-06-24 10:09:41 +00004267 ThrID cand_thrid;
4268 RCEC* cand_rcec;
4269 Bool cand_isW;
4270 SizeT cand_szB;
4271 WordSetID cand_locksHeldW;
4272 Addr cand_a;
sewardja781be62008-12-08 00:12:28 +00004273
4274 Addr toCheck[15];
4275 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004276
4277 tl_assert(thr);
4278 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00004279
sewardjffce8152011-06-24 10:09:41 +00004280 ThrID thrid = thr->thrid;
4281
sewardja781be62008-12-08 00:12:28 +00004282 toCheck[nToCheck++] = a;
4283 for (i = -7; i < (Word)szB; i++) {
4284 if (i != 0)
4285 toCheck[nToCheck++] = a + i;
4286 }
4287 tl_assert(nToCheck <= 15);
4288
4289 /* Now see if we can find a suitable matching event for
4290 any of the addresses in toCheck[0 .. nToCheck-1]. */
4291 for (j = 0; j < nToCheck; j++) {
4292
4293 cand_a = toCheck[j];
4294 // VG_(printf)("test %ld %p\n", j, cand_a);
4295
4296 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
4297 if (!b)
4298 continue;
4299
sewardjd86e3a22008-12-03 11:39:37 +00004300 ref = (OldRef*)valW;
sewardja781be62008-12-08 00:12:28 +00004301 tl_assert(keyW == cand_a);
sewardjf98e1c02008-10-25 16:22:41 +00004302 tl_assert(ref->magic == OldRef_MAGIC);
sewardjffce8152011-06-24 10:09:41 +00004303 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
sewardjf98e1c02008-10-25 16:22:41 +00004304
sewardjffce8152011-06-24 10:09:41 +00004305 cand_thrid = 0; /* invalid; see comments in event_map_bind */
4306 cand_rcec = NULL;
4307 cand_isW = False;
4308 cand_szB = 0;
4309 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
sewardjf98e1c02008-10-25 16:22:41 +00004310
sewardjc5ea9962008-12-07 01:41:46 +00004311 for (i = 0; i < N_OLDREF_ACCS; i++) {
4312 Thr_n_RCEC* cand = &ref->accs[i];
sewardjffce8152011-06-24 10:09:41 +00004313 cand_rcec = cand->rcec;
4314 cand_thrid = cand->thrid;
4315 cand_isW = (Bool)cand->isW;
4316 cand_szB = 1 << cand->szLg2B;
4317 cand_locksHeldW = cand->locksHeldW;
sewardjc5ea9962008-12-07 01:41:46 +00004318
sewardjffce8152011-06-24 10:09:41 +00004319 if (cand_thrid == 0)
sewardjc5ea9962008-12-07 01:41:46 +00004320 /* This slot isn't in use. Ignore it. */
4321 continue;
4322
sewardjffce8152011-06-24 10:09:41 +00004323 if (cand_thrid == thrid)
sewardjc5ea9962008-12-07 01:41:46 +00004324 /* This is an access by the same thread, but we're only
4325 interested in accesses from other threads. Ignore. */
4326 continue;
4327
4328 if ((!cand_isW) && (!isW))
4329 /* We don't want to report a read racing against another
4330 read; that's stupid. So in this case move on. */
4331 continue;
4332
sewardja781be62008-12-08 00:12:28 +00004333 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
4334 /* No overlap with the access we're asking about. Ignore. */
4335 continue;
4336
sewardjc5ea9962008-12-07 01:41:46 +00004337 /* We have a match. Stop searching. */
4338 break;
4339 }
4340
4341 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
4342
sewardja781be62008-12-08 00:12:28 +00004343 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00004344 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00004345 /* return with success */
sewardjffce8152011-06-24 10:09:41 +00004346 tl_assert(cand_thrid);
sewardja781be62008-12-08 00:12:28 +00004347 tl_assert(cand_rcec);
4348 tl_assert(cand_rcec->magic == RCEC_MAGIC);
4349 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00004350 /* Count how many non-zero frames we have. */
4351 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4352 for (n = 0; n < maxNFrames; n++) {
4353 if (0 == cand_rcec->frames[n]) break;
4354 }
sewardjffce8152011-06-24 10:09:41 +00004355 *resEC = VG_(make_ExeContext_from_StackTrace)
4356 (cand_rcec->frames, n);
4357 *resThr = Thr__from_ThrID(cand_thrid);
4358 *resSzB = cand_szB;
4359 *resIsW = cand_isW;
4360 *locksHeldW = cand_locksHeldW;
sewardja781be62008-12-08 00:12:28 +00004361 return True;
4362 }
sewardjc5ea9962008-12-07 01:41:46 +00004363
sewardja781be62008-12-08 00:12:28 +00004364 /* consider next address in toCheck[] */
4365 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00004366
sewardja781be62008-12-08 00:12:28 +00004367 /* really didn't find anything. */
4368 return False;
sewardjf98e1c02008-10-25 16:22:41 +00004369}
4370
4371static void event_map_init ( void )
4372{
4373 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00004374
philippe6643e962012-01-17 21:16:30 +00004375 /* Context (RCEC) pool allocator */
4376 rcec_pool_allocator = VG_(newPA) (
4377 sizeof(RCEC),
4378 1000 /* RCECs per pool */,
4379 HG_(zalloc),
4380 "libhb.event_map_init.1 (RCEC pools)",
4381 HG_(free)
4382 );
sewardjd86e3a22008-12-03 11:39:37 +00004383
4384 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00004385 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00004386 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00004387 N_RCEC_TAB * sizeof(RCEC*) );
sewardjf98e1c02008-10-25 16:22:41 +00004388 for (i = 0; i < N_RCEC_TAB; i++)
4389 contextTab[i] = NULL;
4390
philippe6643e962012-01-17 21:16:30 +00004391 /* Oldref pool allocator */
4392 oldref_pool_allocator = VG_(newPA)(
4393 sizeof(OldRef),
4394 1000 /* OldRefs per pool */,
4395 HG_(zalloc),
4396 "libhb.event_map_init.3 (OldRef pools)",
4397 HG_(free)
4398 );
sewardjd86e3a22008-12-03 11:39:37 +00004399
sewardjd86e3a22008-12-03 11:39:37 +00004400 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00004401 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00004402 oldrefTree = VG_(newSWA)(
4403 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00004404 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00004405 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00004406 );
sewardjf98e1c02008-10-25 16:22:41 +00004407
4408 oldrefGen = 0;
4409 oldrefGenIncAt = 0;
4410 oldrefTreeN = 0;
4411}
4412
4413static void event_map__check_reference_counts ( Bool before )
4414{
4415 RCEC* rcec;
4416 OldRef* oldref;
4417 Word i;
4418 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00004419 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00004420
4421 /* Set the 'check' reference counts to zero. Also, optionally
4422 check that the real reference counts are non-zero. We allow
4423 these to fall to zero before a GC, but the GC must get rid of
4424 all those that are zero, hence none should be zero after a
4425 GC. */
4426 for (i = 0; i < N_RCEC_TAB; i++) {
4427 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4428 nEnts++;
4429 tl_assert(rcec);
4430 tl_assert(rcec->magic == RCEC_MAGIC);
4431 if (!before)
4432 tl_assert(rcec->rc > 0);
4433 rcec->rcX = 0;
4434 }
4435 }
4436
4437 /* check that the stats are sane */
4438 tl_assert(nEnts == stats__ctxt_tab_curr);
4439 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4440
4441 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00004442 VG_(initIterSWA)( oldrefTree );
4443 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004444 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004445 tl_assert(oldref->magic == OldRef_MAGIC);
4446 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004447 ThrID aThrID = oldref->accs[i].thrid;
4448 RCEC* aRef = oldref->accs[i].rcec;
4449 if (aThrID != 0) {
sewardjc5ea9962008-12-07 01:41:46 +00004450 tl_assert(aRef);
4451 tl_assert(aRef->magic == RCEC_MAGIC);
4452 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00004453 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004454 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00004455 }
4456 }
4457 }
4458
4459 /* compare check ref counts with actual */
4460 for (i = 0; i < N_RCEC_TAB; i++) {
4461 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4462 tl_assert(rcec->rc == rcec->rcX);
4463 }
4464 }
4465}
4466
sewardj8fd92d32008-11-20 23:17:01 +00004467__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00004468static void event_map_maybe_GC ( void )
4469{
4470 OldRef* oldref;
4471 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00004472 XArray* refs2del;
4473 Word i, j, n2del;
4474
sewardj8fd92d32008-11-20 23:17:01 +00004475 UWord* genMap = NULL;
4476 UWord genMap_min = 0;
4477 UWord genMap_size = 0;
4478
sewardj849b0ed2008-12-21 10:43:10 +00004479 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
sewardjf98e1c02008-10-25 16:22:41 +00004480 return;
4481
4482 if (0)
4483 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
4484
sewardj849b0ed2008-12-21 10:43:10 +00004485 /* Check for sane command line params. Limit values must match
4486 those in hg_process_cmd_line_option. */
4487 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
sewardjf585e482009-08-16 22:52:29 +00004488 tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 );
sewardj849b0ed2008-12-21 10:43:10 +00004489
sewardj8f5374e2008-12-07 11:40:17 +00004490 /* Check our counting is sane (expensive) */
4491 if (CHECK_CEM)
4492 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00004493
sewardj8f5374e2008-12-07 11:40:17 +00004494 /* Check the reference counts (expensive) */
4495 if (CHECK_CEM)
4496 event_map__check_reference_counts( True/*before*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004497
sewardj8fd92d32008-11-20 23:17:01 +00004498 /* Compute the distribution of generation values in the ref tree.
4499 There are likely only to be a few different generation numbers
4500 in the whole tree, but we don't know what they are. Hence use a
4501 dynamically resized array of counters. The array is genMap[0
4502 .. genMap_size-1], where genMap[0] is the count for the
4503 generation number genMap_min, genMap[1] is the count for
4504 genMap_min+1, etc. If a new number is seen outside the range
4505 [genMap_min .. genMap_min + genMap_size - 1] then the array is
4506 copied into a larger array, and genMap_min and genMap_size are
4507 adjusted accordingly. */
4508
sewardjf98e1c02008-10-25 16:22:41 +00004509 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00004510
sewardjbc307e52008-12-06 22:10:54 +00004511 VG_(initIterSWA)( oldrefTree );
4512 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00004513
sewardjd86e3a22008-12-03 11:39:37 +00004514 UWord ea, key;
4515 oldref = (OldRef*)valW;
4516 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00004517
4518 /* BEGIN find 'ea', which is the index in genMap holding the
4519 count for generation number 'key'. */
4520 if (UNLIKELY(genMap == NULL)) {
4521 /* deal with the first key to be seen, so that the following
4522 cases don't need to handle the complexity of a NULL count
4523 array. */
4524 genMap_min = key;
4525 genMap_size = 1;
4526 genMap = HG_(zalloc)( "libhb.emmG.1a",
4527 genMap_size * sizeof(UWord) );
4528 ea = 0;
4529 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
4530 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004531 }
sewardj8fd92d32008-11-20 23:17:01 +00004532 else
4533 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
4534 /* this is the expected (almost-always-happens) case: 'key'
4535 is already mapped in the array. */
4536 ea = key - genMap_min;
4537 }
4538 else
4539 if (key < genMap_min) {
4540 /* 'key' appears before the start of the current array.
4541 Extend the current array by allocating a larger one and
4542 copying the current one to the upper end of it. */
4543 Word more;
4544 UWord* map2;
4545 more = genMap_min - key;
4546 tl_assert(more > 0);
4547 map2 = HG_(zalloc)( "libhb.emmG.1b",
4548 (genMap_size + more) * sizeof(UWord) );
4549 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
4550 HG_(free)( genMap );
4551 genMap = map2;
4552 genMap_size += more;
4553 genMap_min -= more;
4554 ea = 0;
4555 tl_assert(genMap_min == key);
4556 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
4557 key, genMap_min, genMap_min+genMap_size- 1 );
4558 }
4559 else {
4560 /* 'key' appears after the end of the current array. Extend
4561 the current array by allocating a larger one and copying
4562 the current one to the lower end of it. */
4563 Word more;
4564 UWord* map2;
4565 tl_assert(key >= genMap_min + genMap_size);
4566 more = key - (genMap_min + genMap_size) + 1;
4567 tl_assert(more > 0);
4568 map2 = HG_(zalloc)( "libhb.emmG.1c",
4569 (genMap_size + more) * sizeof(UWord) );
4570 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
4571 HG_(free)( genMap );
4572 genMap = map2;
4573 genMap_size += more;
4574 ea = genMap_size - 1;;
4575 tl_assert(genMap_min + genMap_size - 1 == key);
4576 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
4577 key, genMap_min, genMap_min+genMap_size- 1 );
4578 }
4579 /* END find 'ea' from 'key' */
4580
4581 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00004582 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00004583 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00004584 }
4585
sewardj8fd92d32008-11-20 23:17:01 +00004586 tl_assert(genMap);
4587 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00004588
sewardj8fd92d32008-11-20 23:17:01 +00004589 /* Sanity check what we just computed */
4590 { UWord sum = 0;
4591 for (i = 0; i < genMap_size; i++) {
4592 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
4593 i + genMap_min, genMap[i] );
4594 sum += genMap[i];
4595 }
4596 tl_assert(sum == oldrefTreeN);
4597 }
4598
4599 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00004600 retained = oldrefTreeN;
4601 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00004602
4603 for (i = 0; i < genMap_size; i++) {
4604 keyW = i + genMap_min;
4605 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00004606 tl_assert(keyW > 0); /* can't allow a generation # 0 */
4607 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
4608 tl_assert(keyW >= maxGen);
4609 tl_assert(retained >= valW);
4610 if (retained - valW
sewardj849b0ed2008-12-21 10:43:10 +00004611 > (UWord)(HG_(clo_conflict_cache_size)
4612 * EVENT_MAP_GC_DISCARD_FRACTION)) {
sewardjf98e1c02008-10-25 16:22:41 +00004613 retained -= valW;
4614 maxGen = keyW;
4615 } else {
4616 break;
4617 }
4618 }
sewardjf98e1c02008-10-25 16:22:41 +00004619
sewardj8fd92d32008-11-20 23:17:01 +00004620 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00004621
sewardj9b1f0fd2008-11-18 23:40:00 +00004622 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00004623
4624 /* Now make up a big list of the oldrefTree entries we want to
4625 delete. We can't simultaneously traverse the tree and delete
4626 stuff from it, so first we need to copy them off somewhere
4627 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00004628 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00004629 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00004630
sewardj9b1f0fd2008-11-18 23:40:00 +00004631 if (retained < oldrefTreeN) {
4632
4633 /* This is the normal (expected) case. We discard any ref whose
4634 generation number <= maxGen. */
sewardjbc307e52008-12-06 22:10:54 +00004635 VG_(initIterSWA)( oldrefTree );
4636 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004637 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00004638 tl_assert(oldref->magic == OldRef_MAGIC);
4639 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00004640 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00004641 }
sewardjf98e1c02008-10-25 16:22:41 +00004642 }
sewardj5e2ac3b2009-08-11 10:39:25 +00004643 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004644 VG_(message)(Vg_DebugMsg,
4645 "libhb: EvM GC: delete generations %lu and below, "
sewardj24118492009-07-15 14:50:02 +00004646 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00004647 maxGen, retained );
4648 }
4649
4650 } else {
4651
4652 static UInt rand_seed = 0; /* leave as static */
4653
4654 /* Degenerate case: there's only one generation in the entire
4655 tree, so we need to have some other way of deciding which
4656 refs to throw away. Just throw out half of them randomly. */
4657 tl_assert(retained == oldrefTreeN);
sewardjbc307e52008-12-06 22:10:54 +00004658 VG_(initIterSWA)( oldrefTree );
4659 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004660 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00004661 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00004662 tl_assert(oldref->magic == OldRef_MAGIC);
4663 n = VG_(random)( &rand_seed );
4664 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00004665 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00004666 retained--;
4667 }
4668 }
sewardj5e2ac3b2009-08-11 10:39:25 +00004669 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004670 VG_(message)(Vg_DebugMsg,
4671 "libhb: EvM GC: randomly delete half the entries, "
sewardj24118492009-07-15 14:50:02 +00004672 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00004673 retained );
4674 }
4675
sewardjf98e1c02008-10-25 16:22:41 +00004676 }
4677
4678 n2del = VG_(sizeXA)( refs2del );
4679 tl_assert(n2del == (Word)(oldrefTreeN - retained));
4680
4681 if (0) VG_(printf)("%s","deleting entries\n");
4682 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00004683 Bool b;
4684 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
sewardjbc307e52008-12-06 22:10:54 +00004685 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
sewardjd86e3a22008-12-03 11:39:37 +00004686 tl_assert(b);
4687 tl_assert(keyW == ga2del);
4688 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004689 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004690 ThrID aThrID = oldref->accs[j].thrid;
4691 RCEC* aRef = oldref->accs[j].rcec;
sewardjc5ea9962008-12-07 01:41:46 +00004692 if (aRef) {
sewardjffce8152011-06-24 10:09:41 +00004693 tl_assert(aThrID != 0);
sewardjf98e1c02008-10-25 16:22:41 +00004694 stats__ctxt_rcdec3++;
sewardjc5ea9962008-12-07 01:41:46 +00004695 ctxt__rcdec( aRef );
sewardjf98e1c02008-10-25 16:22:41 +00004696 } else {
sewardjffce8152011-06-24 10:09:41 +00004697 tl_assert(aThrID == 0);
sewardjf98e1c02008-10-25 16:22:41 +00004698 }
4699 }
sewardjd86e3a22008-12-03 11:39:37 +00004700
4701 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00004702 }
4703
4704 VG_(deleteXA)( refs2del );
4705
sewardjc5ea9962008-12-07 01:41:46 +00004706 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00004707
4708 oldrefTreeN = retained;
4709 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
4710
4711 /* Throw away all RCECs with zero reference counts */
4712 for (i = 0; i < N_RCEC_TAB; i++) {
4713 RCEC** pp = &contextTab[i];
4714 RCEC* p = *pp;
4715 while (p) {
4716 if (p->rc == 0) {
4717 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004718 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004719 p = *pp;
4720 tl_assert(stats__ctxt_tab_curr > 0);
4721 stats__ctxt_tab_curr--;
4722 } else {
4723 pp = &p->next;
4724 p = p->next;
4725 }
4726 }
4727 }
4728
sewardj8f5374e2008-12-07 11:40:17 +00004729 /* Check the reference counts (expensive) */
4730 if (CHECK_CEM)
4731 event_map__check_reference_counts( False/*after*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004732
4733 //if (0)
4734 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
4735 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
4736
4737}
4738
4739
4740/////////////////////////////////////////////////////////
4741// //
4742// Core MSM //
4743// //
4744/////////////////////////////////////////////////////////
4745
sewardj23f12002009-07-24 08:45:08 +00004746/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4747 Nov 08, and again after [...],
4748 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004749
sewardj23f12002009-07-24 08:45:08 +00004750static ULong stats__msmcread = 0;
4751static ULong stats__msmcread_change = 0;
4752static ULong stats__msmcwrite = 0;
4753static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004754
sewardj8ab2c132009-08-02 09:34:35 +00004755/* Some notes on the H1 history mechanism:
4756
4757 Transition rules are:
4758
4759 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4760 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4761
4762 After any access by a thread T to a location L, L's constraint pair
4763 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4764
4765 After a race by thread T conflicting with some previous access by
4766 some other thread U, for a location with constraint (before
4767 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4768 which the previously access lies.
4769
4770 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4771 are compared so as to find out which thread(s) this access
4772 conflicts with. Once that is established, we also require the
4773 pre-update Cw for the location, so we can index into it for those
4774 threads, to get the scalar clock values for the point at which the
4775 former accesses were made. (In fact we only bother to do any of
4776 this for an arbitrarily chosen one of the conflicting threads, as
4777 that's simpler, it avoids flooding the user with vast amounts of
4778 mostly useless information, and because the program is wrong if it
4779 contains any races at all -- so we don't really need to show all
4780 conflicting access pairs initially, so long as we only show none if
4781 none exist).
4782
4783 ---
4784
4785 That requires the auxiliary proof that
4786
4787 (Cr `join` Kw)[T] == Kw[T]
4788
4789 Why should that be true? Because for any thread T, Kw[T] >= the
4790 scalar clock value for T known by any other thread. In other
4791 words, because T's value for its own scalar clock is at least as up
4792 to date as the value for it known by any other thread (that is true
4793 for both the R- and W- scalar clocks). Hence no other thread will
4794 be able to feed in a value for that element (indirectly via a
4795 constraint) which will exceed Kw[T], and hence the join cannot
4796 cause that particular element to advance.
4797*/
4798
sewardjf98e1c02008-10-25 16:22:41 +00004799__attribute__((noinline))
4800static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004801 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00004802 VtsID Cfailed,
4803 VtsID Kfailed,
4804 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00004805{
sewardjc5ea9962008-12-07 01:41:46 +00004806 /* Call here to report a race. We just hand it onwards to
4807 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004808 error is going to be collected, then, at history_level 2, that
4809 queries the conflicting-event map. The alternative would be to
4810 query it right here. But that causes a lot of pointless queries
4811 for errors which will shortly be discarded as duplicates, and
4812 can become a performance overhead; so we defer the query until
4813 we know the error is not a duplicate. */
4814
4815 /* Stacks for the bounds of the (or one of the) conflicting
4816 segment(s). These are only set at history_level 1. */
4817 ExeContext* hist1_seg_start = NULL;
4818 ExeContext* hist1_seg_end = NULL;
4819 Thread* hist1_conf_thr = NULL;
4820
4821 tl_assert(acc_thr);
sewardj60626642011-03-10 15:14:37 +00004822 tl_assert(acc_thr->hgthread);
4823 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
sewardj23f12002009-07-24 08:45:08 +00004824 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4825
4826 if (HG_(clo_history_level) == 1) {
4827 Bool found;
4828 Word firstIx, lastIx;
4829 ULong_n_EC key;
4830
4831 /* At history_level 1, we must round up the relevant stack-pair
4832 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00004833 deferring it is complex; we can't (easily) put Kfailed and
4834 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00004835 getting tied up in difficulties with VtsID reference
4836 counting. So just do it now. */
4837 Thr* confThr;
4838 ULong confTym = 0;
4839 /* Which thread are we in conflict with? There may be more than
4840 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4841 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00004842 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00004843 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00004844 conflict (semantics of return value of
4845 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4846 called us, just checked exactly this -- that there was in
4847 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00004848 tl_assert(confThr);
4849
4850 /* Get the scalar clock value that the conflicting thread
4851 introduced into the constraint. A careful examination of the
4852 base machine rules shows that this must be the same as the
4853 conflicting thread's scalar clock when it created this
4854 constraint. Hence we know the scalar clock of the
4855 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00004856 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00004857
4858 /* Using this scalar clock, index into the conflicting thread's
4859 collection of stack traces made each time its vector clock
4860 (hence its scalar clock) changed. This gives the stack
4861 traces at the start and end of the conflicting segment (well,
4862 as per comment just above, of one of the conflicting
4863 segments, if there are more than one). */
4864 key.ull = confTym;
4865 key.ec = NULL;
4866 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00004867 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004868 firstIx = lastIx = 0;
4869 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00004870 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004871 &key, &firstIx, &lastIx,
florian6bd9dc12012-11-23 16:17:43 +00004872 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
sewardj23f12002009-07-24 08:45:08 +00004873 );
sewardj8ab2c132009-08-02 09:34:35 +00004874 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00004875 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00004876 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00004877 confThr, confTym, found, firstIx, lastIx);
4878 /* We can't indefinitely collect stack traces at VTS
4879 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00004880 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00004881 ones, which in turn means we might fail to find index value
4882 confTym in the array. */
4883 if (found) {
4884 ULong_n_EC *pair_start, *pair_end;
4885 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00004886 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00004887 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004888 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00004889 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00004890 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004891 lastIx+1 );
4892 /* from properties of VG_(lookupXA) and the comparison fn used: */
4893 tl_assert(pair_start->ull < pair_end->ull);
4894 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004895 /* Could do a bit better here. It may be that pair_end
4896 doesn't have a stack, but the following entries in the
4897 array have the same scalar Kw and to have a stack. So
4898 we should search a bit further along the array than
4899 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00004900 } else {
sewardjffce8152011-06-24 10:09:41 +00004901 if (!confThr->llexit_done)
sewardj23f12002009-07-24 08:45:08 +00004902 hist1_seg_end = main_get_EC( confThr );
4903 }
4904 // seg_start could be NULL iff this is the first stack in the thread
4905 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4906 //if (seg_end) VG_(pp_ExeContext)(seg_end);
sewardj60626642011-03-10 15:14:37 +00004907 hist1_conf_thr = confThr->hgthread;
sewardj23f12002009-07-24 08:45:08 +00004908 }
4909 }
4910
sewardj60626642011-03-10 15:14:37 +00004911 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00004912 szB, isWrite,
4913 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00004914}
4915
4916static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00004917 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00004918 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00004919 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4920 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00004921}
4922
4923
4924/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00004925static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004926 /* The following are only needed for
4927 creating error reports. */
4928 Thr* acc_thr,
4929 Addr acc_addr, SizeT szB )
4930{
4931 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004932 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00004933
4934 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004935 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004936 tl_assert(is_sane_SVal_C(svOld));
4937 }
4938
sewardj1c0ce7a2009-07-01 08:10:49 +00004939 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004940 VtsID tviR = acc_thr->viR;
4941 VtsID tviW = acc_thr->viW;
4942 VtsID rmini = SVal__unC_Rmin(svOld);
4943 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004944 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4945 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004946 /* no race */
4947 /* Note: RWLOCK subtlety: use tviW, not tviR */
4948 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4949 goto out;
4950 } else {
sewardjb0e009d2008-11-19 16:35:15 +00004951 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004952 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4953 tl_assert(leqxx);
4954 // same as in non-race case
4955 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4956 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004957 rmini, /* Cfailed */
4958 tviR, /* Kfailed */
4959 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004960 goto out;
4961 }
4962 }
4963 if (SVal__isA(svOld)) {
4964 /* reading no-access memory (sigh); leave unchanged */
4965 /* check for no pollution */
4966 tl_assert(svOld == SVal_NOACCESS);
4967 svNew = SVal_NOACCESS;
4968 goto out;
4969 }
sewardj23f12002009-07-24 08:45:08 +00004970 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004971 tl_assert(0);
4972
4973 out:
sewardj8f5374e2008-12-07 11:40:17 +00004974 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004975 tl_assert(is_sane_SVal_C(svNew));
4976 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004977 if (UNLIKELY(svNew != svOld)) {
4978 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004979 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004980 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004981 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004982 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004983 }
4984 }
4985 return svNew;
4986}
4987
4988
4989/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00004990static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004991 /* The following are only needed for
4992 creating error reports. */
4993 Thr* acc_thr,
4994 Addr acc_addr, SizeT szB )
4995{
4996 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004997 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00004998
4999 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005000 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005001 tl_assert(is_sane_SVal_C(svOld));
5002 }
5003
sewardj1c0ce7a2009-07-01 08:10:49 +00005004 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005005 VtsID tviW = acc_thr->viW;
5006 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005007 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5008 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005009 /* no race */
5010 svNew = SVal__mkC( tviW, tviW );
5011 goto out;
5012 } else {
5013 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00005014 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005015 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5016 tl_assert(leqxx);
5017 // same as in non-race case
5018 // proof: in the non-race case, we have
5019 // rmini <= wmini (invar on constraints)
5020 // tviW <= tviR (invar on thread clocks)
5021 // wmini <= tviW (from run-time check)
5022 // hence from transitivity of <= we have
5023 // rmini <= wmini <= tviW
5024 // and so join(rmini,tviW) == tviW
5025 // and join(wmini,tviW) == tviW
5026 // qed.
5027 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5028 VtsID__join2(wmini, tviW) );
5029 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005030 wmini, /* Cfailed */
5031 tviW, /* Kfailed */
5032 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005033 goto out;
5034 }
5035 }
5036 if (SVal__isA(svOld)) {
5037 /* writing no-access memory (sigh); leave unchanged */
5038 /* check for no pollution */
5039 tl_assert(svOld == SVal_NOACCESS);
5040 svNew = SVal_NOACCESS;
5041 goto out;
5042 }
sewardj23f12002009-07-24 08:45:08 +00005043 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005044 tl_assert(0);
5045
5046 out:
sewardj8f5374e2008-12-07 11:40:17 +00005047 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005048 tl_assert(is_sane_SVal_C(svNew));
5049 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005050 if (UNLIKELY(svNew != svOld)) {
5051 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005052 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005053 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005054 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005055 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005056 }
5057 }
5058 return svNew;
5059}
5060
5061
5062/////////////////////////////////////////////////////////
5063// //
5064// Apply core MSM to specific memory locations //
5065// //
5066/////////////////////////////////////////////////////////
5067
sewardj23f12002009-07-24 08:45:08 +00005068/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005069
sewardj23f12002009-07-24 08:45:08 +00005070static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005071 CacheLine* cl;
5072 UWord cloff, tno, toff;
5073 SVal svOld, svNew;
5074 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005075 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005076 cl = get_cacheline(a);
5077 cloff = get_cacheline_offset(a);
5078 tno = get_treeno(a);
5079 toff = get_tree_offset(a); /* == 0 .. 7 */
5080 descr = cl->descrs[tno];
5081 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5082 SVal* tree = &cl->svals[tno << 3];
5083 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005084 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005085 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5086 }
5087 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005088 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005089 if (CHECK_ZSM)
5090 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005091 cl->svals[cloff] = svNew;
5092}
5093
sewardj23f12002009-07-24 08:45:08 +00005094static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005095 CacheLine* cl;
5096 UWord cloff, tno, toff;
5097 SVal svOld, svNew;
5098 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005099 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005100 cl = get_cacheline(a);
5101 cloff = get_cacheline_offset(a);
5102 tno = get_treeno(a);
5103 toff = get_tree_offset(a); /* == 0 .. 7 */
5104 descr = cl->descrs[tno];
5105 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5106 SVal* tree = &cl->svals[tno << 3];
5107 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005108 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005109 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5110 }
5111 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005112 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005113 if (CHECK_ZSM)
5114 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005115 cl->svals[cloff] = svNew;
5116}
5117
sewardj23f12002009-07-24 08:45:08 +00005118/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005119
sewardj23f12002009-07-24 08:45:08 +00005120static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005121 CacheLine* cl;
5122 UWord cloff, tno, toff;
5123 SVal svOld, svNew;
5124 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005125 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005126 if (UNLIKELY(!aligned16(a))) goto slowcase;
5127 cl = get_cacheline(a);
5128 cloff = get_cacheline_offset(a);
5129 tno = get_treeno(a);
5130 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5131 descr = cl->descrs[tno];
5132 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5133 if (valid_value_is_below_me_16(descr, toff)) {
5134 goto slowcase;
5135 } else {
5136 SVal* tree = &cl->svals[tno << 3];
5137 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5138 }
sewardj8f5374e2008-12-07 11:40:17 +00005139 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005140 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5141 }
5142 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005143 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005144 if (CHECK_ZSM)
5145 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005146 cl->svals[cloff] = svNew;
5147 return;
5148 slowcase: /* misaligned, or must go further down the tree */
5149 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005150 zsm_sapply08__msmcread( thr, a + 0 );
5151 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005152}
5153
sewardj23f12002009-07-24 08:45:08 +00005154static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005155 CacheLine* cl;
5156 UWord cloff, tno, toff;
5157 SVal svOld, svNew;
5158 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005159 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005160 if (UNLIKELY(!aligned16(a))) goto slowcase;
5161 cl = get_cacheline(a);
5162 cloff = get_cacheline_offset(a);
5163 tno = get_treeno(a);
5164 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5165 descr = cl->descrs[tno];
5166 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5167 if (valid_value_is_below_me_16(descr, toff)) {
5168 goto slowcase;
5169 } else {
5170 SVal* tree = &cl->svals[tno << 3];
5171 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5172 }
sewardj8f5374e2008-12-07 11:40:17 +00005173 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005174 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5175 }
5176 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005177 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005178 if (CHECK_ZSM)
5179 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005180 cl->svals[cloff] = svNew;
5181 return;
5182 slowcase: /* misaligned, or must go further down the tree */
5183 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005184 zsm_sapply08__msmcwrite( thr, a + 0 );
5185 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005186}
5187
sewardj23f12002009-07-24 08:45:08 +00005188/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005189
sewardj23f12002009-07-24 08:45:08 +00005190static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005191 CacheLine* cl;
5192 UWord cloff, tno, toff;
5193 SVal svOld, svNew;
5194 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005195 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005196 if (UNLIKELY(!aligned32(a))) goto slowcase;
5197 cl = get_cacheline(a);
5198 cloff = get_cacheline_offset(a);
5199 tno = get_treeno(a);
5200 toff = get_tree_offset(a); /* == 0 or 4 */
5201 descr = cl->descrs[tno];
5202 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5203 if (valid_value_is_above_me_32(descr, toff)) {
5204 SVal* tree = &cl->svals[tno << 3];
5205 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5206 } else {
5207 goto slowcase;
5208 }
sewardj8f5374e2008-12-07 11:40:17 +00005209 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005210 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5211 }
5212 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005213 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005214 if (CHECK_ZSM)
5215 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005216 cl->svals[cloff] = svNew;
5217 return;
5218 slowcase: /* misaligned, or must go further down the tree */
5219 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005220 zsm_sapply16__msmcread( thr, a + 0 );
5221 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005222}
5223
sewardj23f12002009-07-24 08:45:08 +00005224static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005225 CacheLine* cl;
5226 UWord cloff, tno, toff;
5227 SVal svOld, svNew;
5228 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005229 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005230 if (UNLIKELY(!aligned32(a))) goto slowcase;
5231 cl = get_cacheline(a);
5232 cloff = get_cacheline_offset(a);
5233 tno = get_treeno(a);
5234 toff = get_tree_offset(a); /* == 0 or 4 */
5235 descr = cl->descrs[tno];
5236 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5237 if (valid_value_is_above_me_32(descr, toff)) {
5238 SVal* tree = &cl->svals[tno << 3];
5239 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5240 } else {
5241 goto slowcase;
5242 }
sewardj8f5374e2008-12-07 11:40:17 +00005243 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005244 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5245 }
5246 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005247 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005248 if (CHECK_ZSM)
5249 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005250 cl->svals[cloff] = svNew;
5251 return;
5252 slowcase: /* misaligned, or must go further down the tree */
5253 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005254 zsm_sapply16__msmcwrite( thr, a + 0 );
5255 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005256}
5257
sewardj23f12002009-07-24 08:45:08 +00005258/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005259
sewardj23f12002009-07-24 08:45:08 +00005260static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005261 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005262 UWord cloff, tno;
5263 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005264 SVal svOld, svNew;
5265 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005266 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005267 if (UNLIKELY(!aligned64(a))) goto slowcase;
5268 cl = get_cacheline(a);
5269 cloff = get_cacheline_offset(a);
5270 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005271 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005272 descr = cl->descrs[tno];
5273 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5274 goto slowcase;
5275 }
5276 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005277 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005278 if (CHECK_ZSM)
5279 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005280 cl->svals[cloff] = svNew;
5281 return;
5282 slowcase: /* misaligned, or must go further down the tree */
5283 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005284 zsm_sapply32__msmcread( thr, a + 0 );
5285 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005286}
5287
sewardj23f12002009-07-24 08:45:08 +00005288static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005289 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005290 UWord cloff, tno;
5291 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005292 SVal svOld, svNew;
5293 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005294 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005295 if (UNLIKELY(!aligned64(a))) goto slowcase;
5296 cl = get_cacheline(a);
5297 cloff = get_cacheline_offset(a);
5298 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005299 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005300 descr = cl->descrs[tno];
5301 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5302 goto slowcase;
5303 }
5304 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005305 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005306 if (CHECK_ZSM)
5307 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005308 cl->svals[cloff] = svNew;
5309 return;
5310 slowcase: /* misaligned, or must go further down the tree */
5311 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005312 zsm_sapply32__msmcwrite( thr, a + 0 );
5313 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005314}
5315
sewardj23f12002009-07-24 08:45:08 +00005316/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005317
5318static
sewardj23f12002009-07-24 08:45:08 +00005319void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005320 CacheLine* cl;
5321 UWord cloff, tno, toff;
5322 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005323 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005324 cl = get_cacheline(a);
5325 cloff = get_cacheline_offset(a);
5326 tno = get_treeno(a);
5327 toff = get_tree_offset(a); /* == 0 .. 7 */
5328 descr = cl->descrs[tno];
5329 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5330 SVal* tree = &cl->svals[tno << 3];
5331 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005332 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005333 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5334 }
5335 tl_assert(svNew != SVal_INVALID);
5336 cl->svals[cloff] = svNew;
5337}
5338
sewardj23f12002009-07-24 08:45:08 +00005339/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005340
5341static
sewardj23f12002009-07-24 08:45:08 +00005342void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005343 CacheLine* cl;
5344 UWord cloff, tno, toff;
5345 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005346 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005347 if (UNLIKELY(!aligned16(a))) goto slowcase;
5348 cl = get_cacheline(a);
5349 cloff = get_cacheline_offset(a);
5350 tno = get_treeno(a);
5351 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5352 descr = cl->descrs[tno];
5353 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5354 if (valid_value_is_below_me_16(descr, toff)) {
5355 /* Writing at this level. Need to fix up 'descr'. */
5356 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5357 /* At this point, the tree does not match cl->descr[tno] any
5358 more. The assignments below will fix it up. */
5359 } else {
5360 /* We can't indiscriminately write on the w16 node as in the
5361 w64 case, as that might make the node inconsistent with
5362 its parent. So first, pull down to this level. */
5363 SVal* tree = &cl->svals[tno << 3];
5364 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005365 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005366 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5367 }
5368 }
5369 tl_assert(svNew != SVal_INVALID);
5370 cl->svals[cloff + 0] = svNew;
5371 cl->svals[cloff + 1] = SVal_INVALID;
5372 return;
5373 slowcase: /* misaligned */
5374 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005375 zsm_swrite08( a + 0, svNew );
5376 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005377}
5378
sewardj23f12002009-07-24 08:45:08 +00005379/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005380
5381static
sewardj23f12002009-07-24 08:45:08 +00005382void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005383 CacheLine* cl;
5384 UWord cloff, tno, toff;
5385 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005386 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005387 if (UNLIKELY(!aligned32(a))) goto slowcase;
5388 cl = get_cacheline(a);
5389 cloff = get_cacheline_offset(a);
5390 tno = get_treeno(a);
5391 toff = get_tree_offset(a); /* == 0 or 4 */
5392 descr = cl->descrs[tno];
5393 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5394 if (valid_value_is_above_me_32(descr, toff)) {
5395 /* We can't indiscriminately write on the w32 node as in the
5396 w64 case, as that might make the node inconsistent with
5397 its parent. So first, pull down to this level. */
5398 SVal* tree = &cl->svals[tno << 3];
5399 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005400 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005401 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5402 } else {
5403 /* Writing at this level. Need to fix up 'descr'. */
5404 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5405 /* At this point, the tree does not match cl->descr[tno] any
5406 more. The assignments below will fix it up. */
5407 }
5408 }
5409 tl_assert(svNew != SVal_INVALID);
5410 cl->svals[cloff + 0] = svNew;
5411 cl->svals[cloff + 1] = SVal_INVALID;
5412 cl->svals[cloff + 2] = SVal_INVALID;
5413 cl->svals[cloff + 3] = SVal_INVALID;
5414 return;
5415 slowcase: /* misaligned */
5416 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005417 zsm_swrite16( a + 0, svNew );
5418 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005419}
5420
sewardj23f12002009-07-24 08:45:08 +00005421/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005422
5423static
sewardj23f12002009-07-24 08:45:08 +00005424void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005425 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005426 UWord cloff, tno;
5427 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00005428 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005429 if (UNLIKELY(!aligned64(a))) goto slowcase;
5430 cl = get_cacheline(a);
5431 cloff = get_cacheline_offset(a);
5432 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005433 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005434 cl->descrs[tno] = TREE_DESCR_64;
5435 tl_assert(svNew != SVal_INVALID);
5436 cl->svals[cloff + 0] = svNew;
5437 cl->svals[cloff + 1] = SVal_INVALID;
5438 cl->svals[cloff + 2] = SVal_INVALID;
5439 cl->svals[cloff + 3] = SVal_INVALID;
5440 cl->svals[cloff + 4] = SVal_INVALID;
5441 cl->svals[cloff + 5] = SVal_INVALID;
5442 cl->svals[cloff + 6] = SVal_INVALID;
5443 cl->svals[cloff + 7] = SVal_INVALID;
5444 return;
5445 slowcase: /* misaligned */
5446 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005447 zsm_swrite32( a + 0, svNew );
5448 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005449}
5450
sewardj23f12002009-07-24 08:45:08 +00005451/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005452
5453static
sewardj23f12002009-07-24 08:45:08 +00005454SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005455 CacheLine* cl;
5456 UWord cloff, tno, toff;
5457 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005458 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005459 cl = get_cacheline(a);
5460 cloff = get_cacheline_offset(a);
5461 tno = get_treeno(a);
5462 toff = get_tree_offset(a); /* == 0 .. 7 */
5463 descr = cl->descrs[tno];
5464 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5465 SVal* tree = &cl->svals[tno << 3];
5466 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5467 }
5468 return cl->svals[cloff];
5469}
5470
sewardj23f12002009-07-24 08:45:08 +00005471static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00005472 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00005473 stats__cline_scopy08s++;
5474 sv = zsm_sread08( src );
5475 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00005476}
5477
5478
sewardj23f12002009-07-24 08:45:08 +00005479/* Block-copy states (needed for implementing realloc()). Note this
5480 doesn't change the filtering arrangements. The caller of
5481 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00005482
sewardj23f12002009-07-24 08:45:08 +00005483static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005484{
5485 SizeT i;
5486 if (len == 0)
5487 return;
5488
5489 /* assert for non-overlappingness */
5490 tl_assert(src+len <= dst || dst+len <= src);
5491
5492 /* To be simple, just copy byte by byte. But so as not to wreck
5493 performance for later accesses to dst[0 .. len-1], normalise
5494 destination lines as we finish with them, and also normalise the
5495 line containing the first and last address. */
5496 for (i = 0; i < len; i++) {
5497 Bool normalise
5498 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5499 || i == 0 /* first in range */
5500 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00005501 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00005502 }
5503}
5504
5505
5506/* For setting address ranges to a given value. Has considerable
5507 sophistication so as to avoid generating large numbers of pointless
5508 cache loads/writebacks for large ranges. */
5509
5510/* Do small ranges in-cache, in the obvious way. */
5511static
sewardj23f12002009-07-24 08:45:08 +00005512void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005513{
5514 /* fast track a couple of common cases */
5515 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00005516 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005517 return;
5518 }
5519 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00005520 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005521 return;
5522 }
5523
5524 /* be completely general (but as efficient as possible) */
5525 if (len == 0) return;
5526
5527 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005528 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005529 a += 1;
5530 len -= 1;
5531 tl_assert(aligned16(a));
5532 }
5533 if (len == 0) return;
5534
5535 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005536 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005537 a += 2;
5538 len -= 2;
5539 tl_assert(aligned32(a));
5540 }
5541 if (len == 0) return;
5542
5543 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005544 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005545 a += 4;
5546 len -= 4;
5547 tl_assert(aligned64(a));
5548 }
5549 if (len == 0) return;
5550
5551 if (len >= 8) {
5552 tl_assert(aligned64(a));
5553 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00005554 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005555 a += 8;
5556 len -= 8;
5557 }
5558 tl_assert(aligned64(a));
5559 }
5560 if (len == 0) return;
5561
5562 if (len >= 4)
5563 tl_assert(aligned32(a));
5564 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005565 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005566 a += 4;
5567 len -= 4;
5568 }
5569 if (len == 0) return;
5570
5571 if (len >= 2)
5572 tl_assert(aligned16(a));
5573 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005574 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005575 a += 2;
5576 len -= 2;
5577 }
5578 if (len == 0) return;
5579
5580 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005581 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00005582 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00005583 len -= 1;
5584 }
5585 tl_assert(len == 0);
5586}
5587
5588
sewardj23f12002009-07-24 08:45:08 +00005589/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00005590 for larger ranges, try to operate directly on the out-of-cache
5591 representation, rather than dragging lines into the cache,
5592 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00005593 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00005594
sewardj23f12002009-07-24 08:45:08 +00005595 Note that this doesn't change the filtering arrangements. The
5596 caller of zsm_sset_range needs to attend to that. */
5597
5598static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005599{
5600 tl_assert(svNew != SVal_INVALID);
5601 stats__cache_make_New_arange += (ULong)len;
5602
5603 if (0 && len > 500)
5604 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
5605
5606 if (0) {
5607 static UWord n_New_in_cache = 0;
5608 static UWord n_New_not_in_cache = 0;
5609 /* tag is 'a' with the in-line offset masked out,
5610 eg a[31]..a[4] 0000 */
5611 Addr tag = a & ~(N_LINE_ARANGE - 1);
5612 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5613 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5614 n_New_in_cache++;
5615 } else {
5616 n_New_not_in_cache++;
5617 }
5618 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5619 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5620 n_New_in_cache, n_New_not_in_cache );
5621 }
5622
5623 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00005624 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005625 } else {
5626 Addr before_start = a;
5627 Addr aligned_start = cacheline_ROUNDUP(a);
5628 Addr after_start = cacheline_ROUNDDN(a + len);
5629 UWord before_len = aligned_start - before_start;
5630 UWord aligned_len = after_start - aligned_start;
5631 UWord after_len = a + len - after_start;
5632 tl_assert(before_start <= aligned_start);
5633 tl_assert(aligned_start <= after_start);
5634 tl_assert(before_len < N_LINE_ARANGE);
5635 tl_assert(after_len < N_LINE_ARANGE);
5636 tl_assert(get_cacheline_offset(aligned_start) == 0);
5637 if (get_cacheline_offset(a) == 0) {
5638 tl_assert(before_len == 0);
5639 tl_assert(a == aligned_start);
5640 }
5641 if (get_cacheline_offset(a+len) == 0) {
5642 tl_assert(after_len == 0);
5643 tl_assert(after_start == a+len);
5644 }
5645 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005646 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005647 }
5648 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005649 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005650 }
5651 stats__cache_make_New_inZrep += (ULong)aligned_len;
5652
5653 while (1) {
5654 Addr tag;
5655 UWord wix;
5656 if (aligned_start >= after_start)
5657 break;
5658 tl_assert(get_cacheline_offset(aligned_start) == 0);
5659 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5660 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5661 if (tag == cache_shmem.tags0[wix]) {
5662 UWord i;
5663 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005664 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005665 } else {
5666 UWord i;
5667 Word zix;
5668 SecMap* sm;
5669 LineZ* lineZ;
5670 /* This line is not in the cache. Do not force it in; instead
5671 modify it in-place. */
5672 /* find the Z line to write in and rcdec it or the
5673 associated F line. */
5674 find_Z_for_writing( &sm, &zix, tag );
5675 tl_assert(sm);
5676 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5677 lineZ = &sm->linesZ[zix];
5678 lineZ->dict[0] = svNew;
5679 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5680 for (i = 0; i < N_LINE_ARANGE/4; i++)
5681 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5682 rcinc_LineZ(lineZ);
5683 }
5684 aligned_start += N_LINE_ARANGE;
5685 aligned_len -= N_LINE_ARANGE;
5686 }
5687 tl_assert(aligned_start == after_start);
5688 tl_assert(aligned_len == 0);
5689 }
5690}
5691
5692
5693/////////////////////////////////////////////////////////
5694// //
sewardj23f12002009-07-24 08:45:08 +00005695// Front-filtering accesses //
5696// //
5697/////////////////////////////////////////////////////////
5698
5699static UWord stats__f_ac = 0;
5700static UWord stats__f_sk = 0;
5701
5702#if 0
5703# define STATS__F_SHOW \
5704 do { \
5705 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5706 VG_(printf)("filters: ac %lu sk %lu\n", \
5707 stats__f_ac, stats__f_sk); \
5708 } while (0)
5709#else
5710# define STATS__F_SHOW /* */
5711#endif
5712
5713void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5714 stats__f_ac++;
5715 STATS__F_SHOW;
5716 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5717 stats__f_sk++;
5718 return;
5719 }
5720 zsm_sapply08__msmcwrite(thr, a);
5721}
5722
5723void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5724 stats__f_ac++;
5725 STATS__F_SHOW;
5726 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5727 stats__f_sk++;
5728 return;
5729 }
5730 zsm_sapply16__msmcwrite(thr, a);
5731}
5732
5733void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5734 stats__f_ac++;
5735 STATS__F_SHOW;
5736 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5737 stats__f_sk++;
5738 return;
5739 }
5740 zsm_sapply32__msmcwrite(thr, a);
5741}
5742
5743void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5744 stats__f_ac++;
5745 STATS__F_SHOW;
5746 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5747 stats__f_sk++;
5748 return;
5749 }
5750 zsm_sapply64__msmcwrite(thr, a);
5751}
5752
5753void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5754{
5755 /* fast track a couple of common cases */
5756 if (len == 4 && aligned32(a)) {
5757 zsm_sapply32_f__msmcwrite( thr, a );
5758 return;
5759 }
5760 if (len == 8 && aligned64(a)) {
5761 zsm_sapply64_f__msmcwrite( thr, a );
5762 return;
5763 }
5764
5765 /* be completely general (but as efficient as possible) */
5766 if (len == 0) return;
5767
5768 if (!aligned16(a) && len >= 1) {
5769 zsm_sapply08_f__msmcwrite( thr, a );
5770 a += 1;
5771 len -= 1;
5772 tl_assert(aligned16(a));
5773 }
5774 if (len == 0) return;
5775
5776 if (!aligned32(a) && len >= 2) {
5777 zsm_sapply16_f__msmcwrite( thr, a );
5778 a += 2;
5779 len -= 2;
5780 tl_assert(aligned32(a));
5781 }
5782 if (len == 0) return;
5783
5784 if (!aligned64(a) && len >= 4) {
5785 zsm_sapply32_f__msmcwrite( thr, a );
5786 a += 4;
5787 len -= 4;
5788 tl_assert(aligned64(a));
5789 }
5790 if (len == 0) return;
5791
5792 if (len >= 8) {
5793 tl_assert(aligned64(a));
5794 while (len >= 8) {
5795 zsm_sapply64_f__msmcwrite( thr, a );
5796 a += 8;
5797 len -= 8;
5798 }
5799 tl_assert(aligned64(a));
5800 }
5801 if (len == 0) return;
5802
5803 if (len >= 4)
5804 tl_assert(aligned32(a));
5805 if (len >= 4) {
5806 zsm_sapply32_f__msmcwrite( thr, a );
5807 a += 4;
5808 len -= 4;
5809 }
5810 if (len == 0) return;
5811
5812 if (len >= 2)
5813 tl_assert(aligned16(a));
5814 if (len >= 2) {
5815 zsm_sapply16_f__msmcwrite( thr, a );
5816 a += 2;
5817 len -= 2;
5818 }
5819 if (len == 0) return;
5820
5821 if (len >= 1) {
5822 zsm_sapply08_f__msmcwrite( thr, a );
5823 //a += 1;
5824 len -= 1;
5825 }
5826 tl_assert(len == 0);
5827}
5828
5829void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5830 stats__f_ac++;
5831 STATS__F_SHOW;
5832 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5833 stats__f_sk++;
5834 return;
5835 }
5836 zsm_sapply08__msmcread(thr, a);
5837}
5838
5839void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5840 stats__f_ac++;
5841 STATS__F_SHOW;
5842 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5843 stats__f_sk++;
5844 return;
5845 }
5846 zsm_sapply16__msmcread(thr, a);
5847}
5848
5849void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5850 stats__f_ac++;
5851 STATS__F_SHOW;
5852 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5853 stats__f_sk++;
5854 return;
5855 }
5856 zsm_sapply32__msmcread(thr, a);
5857}
5858
5859void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5860 stats__f_ac++;
5861 STATS__F_SHOW;
5862 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5863 stats__f_sk++;
5864 return;
5865 }
5866 zsm_sapply64__msmcread(thr, a);
5867}
5868
5869void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5870{
5871 /* fast track a couple of common cases */
5872 if (len == 4 && aligned32(a)) {
5873 zsm_sapply32_f__msmcread( thr, a );
5874 return;
5875 }
5876 if (len == 8 && aligned64(a)) {
5877 zsm_sapply64_f__msmcread( thr, a );
5878 return;
5879 }
5880
5881 /* be completely general (but as efficient as possible) */
5882 if (len == 0) return;
5883
5884 if (!aligned16(a) && len >= 1) {
5885 zsm_sapply08_f__msmcread( thr, a );
5886 a += 1;
5887 len -= 1;
5888 tl_assert(aligned16(a));
5889 }
5890 if (len == 0) return;
5891
5892 if (!aligned32(a) && len >= 2) {
5893 zsm_sapply16_f__msmcread( thr, a );
5894 a += 2;
5895 len -= 2;
5896 tl_assert(aligned32(a));
5897 }
5898 if (len == 0) return;
5899
5900 if (!aligned64(a) && len >= 4) {
5901 zsm_sapply32_f__msmcread( thr, a );
5902 a += 4;
5903 len -= 4;
5904 tl_assert(aligned64(a));
5905 }
5906 if (len == 0) return;
5907
5908 if (len >= 8) {
5909 tl_assert(aligned64(a));
5910 while (len >= 8) {
5911 zsm_sapply64_f__msmcread( thr, a );
5912 a += 8;
5913 len -= 8;
5914 }
5915 tl_assert(aligned64(a));
5916 }
5917 if (len == 0) return;
5918
5919 if (len >= 4)
5920 tl_assert(aligned32(a));
5921 if (len >= 4) {
5922 zsm_sapply32_f__msmcread( thr, a );
5923 a += 4;
5924 len -= 4;
5925 }
5926 if (len == 0) return;
5927
5928 if (len >= 2)
5929 tl_assert(aligned16(a));
5930 if (len >= 2) {
5931 zsm_sapply16_f__msmcread( thr, a );
5932 a += 2;
5933 len -= 2;
5934 }
5935 if (len == 0) return;
5936
5937 if (len >= 1) {
5938 zsm_sapply08_f__msmcread( thr, a );
5939 //a += 1;
5940 len -= 1;
5941 }
5942 tl_assert(len == 0);
5943}
5944
5945void libhb_Thr_resumes ( Thr* thr )
5946{
5947 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005948 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00005949 tl_assert(!thr->llexit_done);
sewardj23f12002009-07-24 08:45:08 +00005950 Filter__clear(thr->filter, "libhb_Thr_resumes");
5951 /* A kludge, but .. if this thread doesn't have any marker stacks
5952 at all, get one right now. This is easier than figuring out
5953 exactly when at thread startup we can and can't take a stack
5954 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00005955 if (HG_(clo_history_level) == 1) {
5956 tl_assert(thr->local_Kws_n_stacks);
5957 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
5958 note_local_Kw_n_stack_for(thr);
5959 }
sewardj23f12002009-07-24 08:45:08 +00005960}
5961
5962
5963/////////////////////////////////////////////////////////
5964// //
sewardjf98e1c02008-10-25 16:22:41 +00005965// Synchronisation objects //
5966// //
5967/////////////////////////////////////////////////////////
5968
sewardjffce8152011-06-24 10:09:41 +00005969/* A double linked list of all the SO's. */
5970SO* admin_SO = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00005971
sewardjffce8152011-06-24 10:09:41 +00005972static SO* SO__Alloc ( void )
5973{
sewardjf98e1c02008-10-25 16:22:41 +00005974 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
5975 so->viR = VtsID_INVALID;
5976 so->viW = VtsID_INVALID;
5977 so->magic = SO_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00005978 /* Add to double linked list */
5979 if (admin_SO) {
5980 tl_assert(admin_SO->admin_prev == NULL);
5981 admin_SO->admin_prev = so;
5982 so->admin_next = admin_SO;
5983 } else {
5984 so->admin_next = NULL;
5985 }
5986 so->admin_prev = NULL;
5987 admin_SO = so;
5988 /* */
sewardjf98e1c02008-10-25 16:22:41 +00005989 return so;
5990}
sewardjffce8152011-06-24 10:09:41 +00005991
5992static void SO__Dealloc ( SO* so )
5993{
sewardjf98e1c02008-10-25 16:22:41 +00005994 tl_assert(so);
5995 tl_assert(so->magic == SO_MAGIC);
5996 if (so->viR == VtsID_INVALID) {
5997 tl_assert(so->viW == VtsID_INVALID);
5998 } else {
5999 tl_assert(so->viW != VtsID_INVALID);
6000 VtsID__rcdec(so->viR);
6001 VtsID__rcdec(so->viW);
6002 }
6003 so->magic = 0;
sewardjffce8152011-06-24 10:09:41 +00006004 /* Del from double linked list */
6005 if (so->admin_prev)
6006 so->admin_prev->admin_next = so->admin_next;
6007 if (so->admin_next)
6008 so->admin_next->admin_prev = so->admin_prev;
6009 if (so == admin_SO)
6010 admin_SO = so->admin_next;
6011 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006012 HG_(free)( so );
6013}
6014
6015
6016/////////////////////////////////////////////////////////
6017// //
6018// Top Level API //
6019// //
6020/////////////////////////////////////////////////////////
6021
florian6bd9dc12012-11-23 16:17:43 +00006022static void show_thread_state ( const HChar* str, Thr* t )
sewardjf98e1c02008-10-25 16:22:41 +00006023{
6024 if (1) return;
6025 if (t->viR == t->viW) {
6026 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6027 VtsID__pp( t->viR );
6028 VG_(printf)("%s","\n");
6029 } else {
6030 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6031 VtsID__pp( t->viR );
6032 VG_(printf)(" viW %u==", t->viW);
6033 VtsID__pp( t->viW );
6034 VG_(printf)("%s","\n");
6035 }
6036}
6037
6038
6039Thr* libhb_init (
6040 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00006041 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00006042 )
6043{
6044 Thr* thr;
6045 VtsID vi;
sewardje4cce742011-02-24 15:25:24 +00006046
6047 // We will have to have to store a large number of these,
6048 // so make sure they're the size we expect them to be.
6049 tl_assert(sizeof(ScalarTS) == 8);
sewardjffce8152011-06-24 10:09:41 +00006050
6051 /* because first 1024 unusable */
6052 tl_assert(SCALARTS_N_THRBITS >= 11);
6053 /* so as to fit in a UInt w/ 3 bits to spare (see defn of
6054 Thr_n_RCEC). */
6055 tl_assert(SCALARTS_N_THRBITS <= 29);
6056
6057 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6058 (32-bit). It's not correctness-critical, but there are a lot of
6059 them, so it's important from a space viewpoint. Unfortunately
6060 we simply can't pack it into 2 words on a 32-bit target. */
6061 if (sizeof(UWord) == 8) {
6062 tl_assert(sizeof(Thr_n_RCEC) == 16);
6063 } else {
6064 tl_assert(sizeof(Thr_n_RCEC) == 12);
6065 }
6066
6067 /* Word sets really are 32 bits. Even on a 64 bit target. */
6068 tl_assert(sizeof(WordSetID) == 4);
6069 tl_assert(sizeof(WordSet) == sizeof(WordSetID));
sewardje4cce742011-02-24 15:25:24 +00006070
sewardjf98e1c02008-10-25 16:22:41 +00006071 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00006072 tl_assert(get_EC);
6073 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00006074 main_get_EC = get_EC;
6075
6076 // No need to initialise hg_wordfm.
6077 // No need to initialise hg_wordset.
6078
sewardj7aa38a92011-02-27 23:04:12 +00006079 /* Allocated once and never deallocated. Used as a temporary in
6080 VTS singleton, tick and join operations. */
6081 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6082 temp_max_sized_VTS->id = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00006083 verydead_thread_table_init();
sewardjf98e1c02008-10-25 16:22:41 +00006084 vts_set_init();
6085 vts_tab_init();
6086 event_map_init();
6087 VtsID__invalidate_caches();
6088
6089 // initialise shadow memory
6090 zsm_init( SVal__rcinc, SVal__rcdec );
6091
6092 thr = Thr__new();
6093 vi = VtsID__mk_Singleton( thr, 1 );
6094 thr->viR = vi;
6095 thr->viW = vi;
6096 VtsID__rcinc(thr->viR);
6097 VtsID__rcinc(thr->viW);
6098
6099 show_thread_state(" root", thr);
6100 return thr;
6101}
6102
sewardj23f12002009-07-24 08:45:08 +00006103
sewardjf98e1c02008-10-25 16:22:41 +00006104Thr* libhb_create ( Thr* parent )
6105{
6106 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6107 the child's index. Since the child's index is guaranteed
6108 unique, it has never been seen before, so the implicit value
6109 before the tick is zero and after that is one. */
6110 Thr* child = Thr__new();
6111
6112 child->viR = VtsID__tick( parent->viR, child );
6113 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00006114 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00006115 VtsID__rcinc(child->viR);
6116 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006117 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00006118 early for that - it may not have a valid TId yet. So, let
6119 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00006120
6121 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6122 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6123
6124 /* and the parent has to move along too */
6125 VtsID__rcdec(parent->viR);
6126 VtsID__rcdec(parent->viW);
6127 parent->viR = VtsID__tick( parent->viR, parent );
6128 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00006129 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00006130 VtsID__rcinc(parent->viR);
6131 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006132 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00006133
6134 show_thread_state(" child", child);
6135 show_thread_state("parent", parent);
6136
6137 return child;
6138}
6139
6140/* Shut down the library, and print stats (in fact that's _all_
6141 this is for. */
6142void libhb_shutdown ( Bool show_stats )
6143{
6144 if (show_stats) {
6145 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6146 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6147 stats__secmaps_allocd,
6148 stats__secmap_ga_space_covered);
6149 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6150 stats__secmap_linesZ_allocd,
6151 stats__secmap_linesZ_bytes);
6152 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
6153 stats__secmap_linesF_allocd,
6154 stats__secmap_linesF_bytes);
6155 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
6156 stats__secmap_iterator_steppings);
6157 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6158 stats__secmaps_search, stats__secmaps_search_slow);
6159
6160 VG_(printf)("%s","\n");
6161 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6162 stats__cache_totrefs, stats__cache_totmisses );
6163 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6164 stats__cache_Z_fetches, stats__cache_F_fetches );
6165 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6166 stats__cache_Z_wbacks, stats__cache_F_wbacks );
6167 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
6168 stats__cache_invals, stats__cache_flushes );
6169 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6170 stats__cache_make_New_arange,
6171 stats__cache_make_New_inZrep);
6172
6173 VG_(printf)("%s","\n");
6174 VG_(printf)(" cline: %'10lu normalises\n",
6175 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00006176 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6177 stats__cline_cread64s,
6178 stats__cline_cread32s,
6179 stats__cline_cread16s,
6180 stats__cline_cread08s );
6181 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6182 stats__cline_cwrite64s,
6183 stats__cline_cwrite32s,
6184 stats__cline_cwrite16s,
6185 stats__cline_cwrite08s );
6186 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6187 stats__cline_swrite64s,
6188 stats__cline_swrite32s,
6189 stats__cline_swrite16s,
6190 stats__cline_swrite08s );
6191 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6192 stats__cline_sread08s, stats__cline_scopy08s );
sewardjf98e1c02008-10-25 16:22:41 +00006193 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
6194 stats__cline_64to32splits,
6195 stats__cline_32to16splits,
6196 stats__cline_16to8splits );
6197 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
6198 stats__cline_64to32pulldown,
6199 stats__cline_32to16pulldown,
6200 stats__cline_16to8pulldown );
6201 if (0)
6202 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
6203 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
6204
6205 VG_(printf)("%s","\n");
6206
sewardjc8028ad2010-05-05 09:34:42 +00006207 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006208 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00006209 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006210 stats__msmcwrite, stats__msmcwrite_change);
6211 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6212 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00006213 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6214 stats__join2_queries, stats__join2_misses);
6215
6216 VG_(printf)("%s","\n");
sewardjc8028ad2010-05-05 09:34:42 +00006217 VG_(printf)( " libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6218 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6219 VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6220 stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
sewardj7aa38a92011-02-27 23:04:12 +00006221 VG_(printf)( " libhb: VTSset: find__or__clone_and_add %'lu (%'lu allocd)\n",
6222 stats__vts_set__focaa, stats__vts_set__focaa_a );
sewardjc8028ad2010-05-05 09:34:42 +00006223 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6224 stats__vts__indexat_slow );
6225
6226 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00006227 VG_(printf)(
6228 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6229 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6230 );
6231 VG_(printf)( " libhb: %lu entries in vts_set\n",
6232 VG_(sizeFM)( vts_set ) );
6233
6234 VG_(printf)("%s","\n");
6235 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
6236 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
6237 stats__ctxt_rcdec2,
6238 stats__ctxt_rcdec3 );
6239 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
6240 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
6241 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
6242 (UWord)N_RCEC_TAB,
6243 stats__ctxt_tab_curr );
6244 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6245 stats__ctxt_tab_qs,
6246 stats__ctxt_tab_cmps );
6247#if 0
6248 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6249 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6250 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6251 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6252 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6253 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6254 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6255 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6256 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6257 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6258 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6259 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6260 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6261 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6262
6263 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6264 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6265 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6266 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6267#endif
6268
6269 VG_(printf)("%s","<<< END libhb stats >>>\n");
6270 VG_(printf)("%s","\n");
6271
6272 }
6273}
6274
sewardjffce8152011-06-24 10:09:41 +00006275/* Receive notification that a thread has low level exited. The
6276 significance here is that we do not expect to see any more memory
6277 references from it. */
sewardjf98e1c02008-10-25 16:22:41 +00006278void libhb_async_exit ( Thr* thr )
6279{
sewardj23f12002009-07-24 08:45:08 +00006280 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006281 tl_assert(!thr->llexit_done);
6282 thr->llexit_done = True;
sewardj2d2ea2f2009-08-02 10:15:07 +00006283
6284 /* free up Filter and local_Kws_n_stacks (well, actually not the
6285 latter ..) */
6286 tl_assert(thr->filter);
6287 HG_(free)(thr->filter);
6288 thr->filter = NULL;
6289
sewardjffce8152011-06-24 10:09:41 +00006290 /* Tell the VTS mechanism this thread has exited, so it can
6291 participate in VTS pruning. Note this can only happen if the
6292 thread has both ll_exited and has been joined with. */
6293 if (thr->joinedwith_done)
6294 VTS__declare_thread_very_dead(thr);
6295
sewardj2d2ea2f2009-08-02 10:15:07 +00006296 /* Another space-accuracy tradeoff. Do we want to be able to show
6297 H1 history for conflicts in threads which have since exited? If
6298 yes, then we better not free up thr->local_Kws_n_stacks. The
6299 downside is a potential per-thread leak of up to
6300 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6301 XArray average overcommit factor is (1.5 I'd guess). */
6302 // hence:
6303 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6304 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006305}
6306
sewardjffce8152011-06-24 10:09:41 +00006307/* Receive notification that a thread has been joined with. The
6308 significance here is that we do not expect to see any further
6309 references to its vector clocks (Thr::viR and Thr::viW). */
6310void libhb_joinedwith_done ( Thr* thr )
6311{
6312 tl_assert(thr);
6313 /* Caller must ensure that this is only ever called once per Thr. */
6314 tl_assert(!thr->joinedwith_done);
6315 thr->joinedwith_done = True;
6316 if (thr->llexit_done)
6317 VTS__declare_thread_very_dead(thr);
6318}
6319
6320
sewardjf98e1c02008-10-25 16:22:41 +00006321/* Both Segs and SOs point to VTSs. However, there is no sharing, so
6322 a Seg that points at a VTS is its one-and-only owner, and ditto for
6323 a SO that points at a VTS. */
6324
6325SO* libhb_so_alloc ( void )
6326{
6327 return SO__Alloc();
6328}
6329
6330void libhb_so_dealloc ( SO* so )
6331{
6332 tl_assert(so);
6333 tl_assert(so->magic == SO_MAGIC);
6334 SO__Dealloc(so);
6335}
6336
6337/* See comments in libhb.h for details on the meaning of
6338 strong vs weak sends and strong vs weak receives. */
6339void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6340{
6341 /* Copy the VTSs from 'thr' into the sync object, and then move
6342 the thread along one step. */
6343
6344 tl_assert(so);
6345 tl_assert(so->magic == SO_MAGIC);
6346
6347 /* stay sane .. a thread's read-clock must always lead or be the
6348 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00006349 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6350 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00006351 }
6352
6353 /* since we're overwriting the VtsIDs in the SO, we need to drop
6354 any references made by the previous contents thereof */
6355 if (so->viR == VtsID_INVALID) {
6356 tl_assert(so->viW == VtsID_INVALID);
6357 so->viR = thr->viR;
6358 so->viW = thr->viW;
6359 VtsID__rcinc(so->viR);
6360 VtsID__rcinc(so->viW);
6361 } else {
6362 /* In a strong send, we dump any previous VC in the SO and
6363 install the sending thread's VC instead. For a weak send we
6364 must join2 with what's already there. */
6365 tl_assert(so->viW != VtsID_INVALID);
6366 VtsID__rcdec(so->viR);
6367 VtsID__rcdec(so->viW);
6368 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6369 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6370 VtsID__rcinc(so->viR);
6371 VtsID__rcinc(so->viW);
6372 }
6373
6374 /* move both parent clocks along */
6375 VtsID__rcdec(thr->viR);
6376 VtsID__rcdec(thr->viW);
6377 thr->viR = VtsID__tick( thr->viR, thr );
6378 thr->viW = VtsID__tick( thr->viW, thr );
sewardjffce8152011-06-24 10:09:41 +00006379 if (!thr->llexit_done) {
sewardj2d2ea2f2009-08-02 10:15:07 +00006380 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00006381 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006382 }
sewardjf98e1c02008-10-25 16:22:41 +00006383 VtsID__rcinc(thr->viR);
6384 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006385
sewardjf98e1c02008-10-25 16:22:41 +00006386 if (strong_send)
6387 show_thread_state("s-send", thr);
6388 else
6389 show_thread_state("w-send", thr);
6390}
6391
6392void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6393{
6394 tl_assert(so);
6395 tl_assert(so->magic == SO_MAGIC);
6396
6397 if (so->viR != VtsID_INVALID) {
6398 tl_assert(so->viW != VtsID_INVALID);
6399
6400 /* Weak receive (basically, an R-acquisition of a R-W lock).
6401 This advances the read-clock of the receiver, but not the
6402 write-clock. */
6403 VtsID__rcdec(thr->viR);
6404 thr->viR = VtsID__join2( thr->viR, so->viR );
6405 VtsID__rcinc(thr->viR);
6406
sewardj90eb22e2009-07-28 20:22:18 +00006407 /* At one point (r10589) it seemed safest to tick the clocks for
6408 the receiving thread after the join. But on reflection, I
6409 wonder if that might cause it to 'overtake' constraints,
6410 which could lead to missing races. So, back out that part of
6411 r10589. */
6412 //VtsID__rcdec(thr->viR);
6413 //thr->viR = VtsID__tick( thr->viR, thr );
6414 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00006415
sewardjf98e1c02008-10-25 16:22:41 +00006416 /* For a strong receive, we also advance the receiver's write
6417 clock, which means the receive as a whole is essentially
6418 equivalent to a W-acquisition of a R-W lock. */
6419 if (strong_recv) {
6420 VtsID__rcdec(thr->viW);
6421 thr->viW = VtsID__join2( thr->viW, so->viW );
6422 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006423
sewardj90eb22e2009-07-28 20:22:18 +00006424 /* See comment just above, re r10589. */
6425 //VtsID__rcdec(thr->viW);
6426 //thr->viW = VtsID__tick( thr->viW, thr );
6427 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00006428 }
6429
sewardjf4845dc2010-05-28 20:09:59 +00006430 if (thr->filter)
6431 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00006432 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00006433
sewardjf98e1c02008-10-25 16:22:41 +00006434 if (strong_recv)
6435 show_thread_state("s-recv", thr);
6436 else
6437 show_thread_state("w-recv", thr);
6438
6439 } else {
6440 tl_assert(so->viW == VtsID_INVALID);
6441 /* Deal with degenerate case: 'so' has no vts, so there has been
6442 no message posted to it. Just ignore this case. */
6443 show_thread_state("d-recv", thr);
6444 }
6445}
6446
6447Bool libhb_so_everSent ( SO* so )
6448{
6449 if (so->viR == VtsID_INVALID) {
6450 tl_assert(so->viW == VtsID_INVALID);
6451 return False;
6452 } else {
6453 tl_assert(so->viW != VtsID_INVALID);
6454 return True;
6455 }
6456}
6457
6458#define XXX1 0 // 0x67a106c
6459#define XXX2 0
6460
sewardj23f12002009-07-24 08:45:08 +00006461static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00006462 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6463 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6464 return False;
6465}
florian0c8a47c2013-10-01 20:10:21 +00006466static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
florian6bf37262012-10-21 03:23:36 +00006467{
sewardj23f12002009-07-24 08:45:08 +00006468 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00006469 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6470 show_thread_state("", thr);
6471 VG_(printf)("%s","\n");
6472}
6473
sewardj23f12002009-07-24 08:45:08 +00006474void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006475{
6476 SVal sv = SVal__mkC(thr->viW, thr->viW);
6477 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00006478 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6479 zsm_sset_range( a, szB, sv );
6480 Filter__clear_range( thr->filter, a, szB );
6481 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00006482}
6483
sewardjfd35d492011-03-17 19:39:55 +00006484void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006485{
sewardj23f12002009-07-24 08:45:08 +00006486 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00006487}
6488
sewardjfd35d492011-03-17 19:39:55 +00006489void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6490{
6491 /* This really does put the requested range in NoAccess. It's
6492 expensive though. */
6493 SVal sv = SVal_NOACCESS;
6494 tl_assert(is_sane_SVal_C(sv));
6495 zsm_sset_range( a, szB, sv );
6496 Filter__clear_range( thr->filter, a, szB );
6497}
6498
sewardj406bac82010-03-03 23:03:40 +00006499void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
6500{
6501 SVal sv = SVal_NOACCESS;
6502 tl_assert(is_sane_SVal_C(sv));
6503 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
6504 zsm_sset_range( a, szB, sv );
6505 Filter__clear_range( thr->filter, a, szB );
6506 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
6507}
6508
sewardj0b20a152011-03-10 21:34:21 +00006509Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
sewardjf98e1c02008-10-25 16:22:41 +00006510 tl_assert(thr);
sewardj60626642011-03-10 15:14:37 +00006511 return thr->hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006512}
6513
sewardj0b20a152011-03-10 21:34:21 +00006514void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
sewardjf98e1c02008-10-25 16:22:41 +00006515 tl_assert(thr);
sewardj0b20a152011-03-10 21:34:21 +00006516 thr->hgthread = hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006517}
6518
sewardj23f12002009-07-24 08:45:08 +00006519void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00006520{
sewardj23f12002009-07-24 08:45:08 +00006521 zsm_scopy_range(src, dst, len);
6522 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00006523}
6524
6525void libhb_maybe_GC ( void )
6526{
6527 event_map_maybe_GC();
6528 /* If there are still freelist entries available, no need for a
6529 GC. */
6530 if (vts_tab_freelist != VtsID_INVALID)
6531 return;
6532 /* So all the table entries are full, and we're having to expand
6533 the table. But did we hit the threshhold point yet? */
6534 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
6535 return;
6536 vts_tab__do_GC( False/*don't show stats*/ );
6537}
6538
6539
6540/////////////////////////////////////////////////////////////////
6541/////////////////////////////////////////////////////////////////
6542// //
6543// SECTION END main library //
6544// //
6545/////////////////////////////////////////////////////////////////
6546/////////////////////////////////////////////////////////////////
6547
6548/*--------------------------------------------------------------------*/
6549/*--- end libhb_main.c ---*/
6550/*--------------------------------------------------------------------*/