blob: 1b71e3f9ac1c80aa1d1bfe085b7ea5a9b0320c0c [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
sewardj03f8d3f2012-08-05 15:46:46 +000012 Copyright (C) 2008-2012 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000034#include "pub_tool_poolalloc.h"
sewardjf98e1c02008-10-25 16:22:41 +000035#include "pub_tool_libcassert.h"
36#include "pub_tool_libcbase.h"
37#include "pub_tool_libcprint.h"
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000040#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000041#include "pub_tool_xarray.h"
42#include "pub_tool_oset.h"
43#include "pub_tool_threadstate.h"
44#include "pub_tool_aspacemgr.h"
45#include "pub_tool_execontext.h"
46#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000047#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000048#include "hg_basics.h"
49#include "hg_wordset.h"
50#include "hg_lock_n_thread.h"
51#include "hg_errors.h"
52
53#include "libhb.h"
54
55
sewardj8f5374e2008-12-07 11:40:17 +000056/////////////////////////////////////////////////////////////////
57/////////////////////////////////////////////////////////////////
58// //
59// Debugging #defines //
60// //
61/////////////////////////////////////////////////////////////////
62/////////////////////////////////////////////////////////////////
63
64/* Check the sanity of shadow values in the core memory state
65 machine. Change #if 0 to #if 1 to enable this. */
66#if 0
67# define CHECK_MSM 1
68#else
69# define CHECK_MSM 0
70#endif
71
72
73/* Check sanity (reference counts, etc) in the conflicting access
74 machinery. Change #if 0 to #if 1 to enable this. */
75#if 0
76# define CHECK_CEM 1
77#else
78# define CHECK_CEM 0
79#endif
80
81
82/* Check sanity in the compressed shadow memory machinery,
83 particularly in its caching innards. Unfortunately there's no
84 almost-zero-cost way to make them selectable at run time. Hence
85 set the #if 0 to #if 1 and rebuild if you want them. */
86#if 0
87# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
88# define inline __attribute__((noinline))
89 /* probably want to ditch -fomit-frame-pointer too */
90#else
91# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
92#endif
93
94
95/////////////////////////////////////////////////////////////////
96/////////////////////////////////////////////////////////////////
97// //
sewardjffce8152011-06-24 10:09:41 +000098// data decls: VtsID //
99// //
100/////////////////////////////////////////////////////////////////
101/////////////////////////////////////////////////////////////////
102
103/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
104 bits, since they have to be packed into the lowest 30 bits of an
105 SVal. */
106typedef UInt VtsID;
107#define VtsID_INVALID 0xFFFFFFFF
108
109
110
111/////////////////////////////////////////////////////////////////
112/////////////////////////////////////////////////////////////////
113// //
114// data decls: SVal //
115// //
116/////////////////////////////////////////////////////////////////
117/////////////////////////////////////////////////////////////////
118
119typedef ULong SVal;
120
121/* This value has special significance to the implementation, and callers
122 may not store it in the shadow memory. */
123#define SVal_INVALID (3ULL << 62)
124
125/* This is the default value for shadow memory. Initially the shadow
126 memory contains no accessible areas and so all reads produce this
127 value. TODO: make this caller-defineable. */
128#define SVal_NOACCESS (2ULL << 62)
129
130
131
132/////////////////////////////////////////////////////////////////
133/////////////////////////////////////////////////////////////////
134// //
135// data decls: ScalarTS //
136// //
137/////////////////////////////////////////////////////////////////
138/////////////////////////////////////////////////////////////////
139
140/* Scalar Timestamp. We have to store a lot of these, so there is
141 some effort to make them as small as possible. Logically they are
142 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
143 We pack it into 64 bits by representing the Thr* using a ThrID, a
144 small integer (18 bits), and a 46 bit integer for the timestamp
145 number. The 46/18 split is arbitary, but has the effect that
146 Helgrind can only handle programs that create 2^18 or fewer threads
147 over their entire lifetime, and have no more than 2^46 timestamp
148 ticks (synchronisation operations on the same thread).
149
150 This doesn't seem like much of a limitation. 2^46 ticks is
151 7.06e+13, and if each tick (optimistically) takes the machine 1000
152 cycles to process, then the minimum time to process that many ticks
153 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
154 but VTS ticks, which isn't realistic.
155
156 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
157 32 since a ThrID is a UInt. 29 comes from the fact that
158 'Thr_n_RCEC', which records information about old accesses, packs
159 not only a ThrID but also 2+1 other bits (access size and
160 writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
161
162 NB2: thrid values are issued upwards from 1024, and values less
163 than that aren't valid. This isn't per se necessary (any order
164 will do, so long as they are unique), but it does help ensure they
165 are less likely to get confused with the various other kinds of
166 small-integer thread ids drifting around (eg, TId). See also NB5.
167
168 NB3: this probably also relies on the fact that Thr's are never
169 deallocated -- they exist forever. Hence the 1-1 mapping from
170 Thr's to thrid values (set up in Thr__new) persists forever.
171
172 NB4: temp_max_sized_VTS is allocated at startup and never freed.
173 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
174 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
175 making the memory use for this go sky-high. With
176 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
177 like an OK tradeoff. If more than 256k threads need to be
178 supported, we could change SCALARTS_N_THRBITS to 20, which would
179 facilitate supporting 1 million threads at the cost of 8MB storage
180 for temp_max_sized_VTS.
181
182 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
183 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
184 must never be a valid ThrID. Given NB2 that's OK.
185*/
186#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
187
188#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
189typedef
190 struct {
191 ThrID thrid : SCALARTS_N_THRBITS;
192 ULong tym : SCALARTS_N_TYMBITS;
193 }
194 ScalarTS;
195
196#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
197
198
199
200/////////////////////////////////////////////////////////////////
201/////////////////////////////////////////////////////////////////
202// //
203// data decls: Filter //
204// //
205/////////////////////////////////////////////////////////////////
206/////////////////////////////////////////////////////////////////
207
208// baseline: 5, 9
209#define FI_LINE_SZB_LOG2 5
210#define FI_NUM_LINES_LOG2 10
211
212#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
213#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
214
215#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
216#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
217
218#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
219 & (Addr)(FI_NUM_LINES-1) )
220
221
222/* In the lines, each 8 bytes are treated individually, and are mapped
223 to a UShort. Regardless of endianness of the underlying machine,
224 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
225 the highest address.
226
227 Of each bit pair, the higher numbered bit is set if a R has been
228 seen, so the actual layout is:
229
230 15 14 ... 01 00
231
232 R W for addr+7 ... R W for addr+0
233
234 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
235*/
236
237/* tags are separated from lines. tags are Addrs and are
238 the base address of the line. */
239typedef
240 struct {
241 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
242 }
243 FiLine;
244
245typedef
246 struct {
247 Addr tags[FI_NUM_LINES];
248 FiLine lines[FI_NUM_LINES];
249 }
250 Filter;
251
252
253
254/////////////////////////////////////////////////////////////////
255/////////////////////////////////////////////////////////////////
256// //
257// data decls: Thr, ULong_n_EC //
258// //
259/////////////////////////////////////////////////////////////////
260/////////////////////////////////////////////////////////////////
261
262// Records stacks for H1 history mechanism (DRD-style)
263typedef
264 struct { ULong ull; ExeContext* ec; }
265 ULong_n_EC;
266
267
268/* How many of the above records to collect for each thread? Older
269 ones are dumped when we run out of space. 62.5k requires 1MB per
270 thread, since each ULong_n_EC record is 16 bytes long. When more
271 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
272 deleted to make space. Hence in the worst case we will be able to
273 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
274 Kw transitions (segments in this thread). For the current setting
275 that gives a guaranteed stack for at least the last 31.25k
276 segments. */
277#define N_KWs_N_STACKs_PER_THREAD 62500
278
279
280struct _Thr {
281 /* Current VTSs for this thread. They change as we go along. viR
282 is the VTS to be used for reads, viW for writes. Usually they
283 are the same, but can differ when we deal with reader-writer
284 locks. It is always the case that
285 VtsID__cmpLEQ(viW,viR) == True
286 that is, viW must be the same, or lagging behind, viR. */
287 VtsID viR;
288 VtsID viW;
289
290 /* Is initially False, and is set to True after the thread really
291 has done a low-level exit. When True, we expect to never see
292 any more memory references done by this thread. */
293 Bool llexit_done;
294
295 /* Is initially False, and is set to True after the thread has been
296 joined with (reaped by some other thread). After this point, we
297 do not expect to see any uses of .viR or .viW, so it is safe to
298 set them to VtsID_INVALID. */
299 Bool joinedwith_done;
300
301 /* A small integer giving a unique identity to this Thr. See
302 comments on the definition of ScalarTS for details. */
303 ThrID thrid : SCALARTS_N_THRBITS;
304
305 /* A filter that removes references for which we believe that
306 msmcread/msmcwrite will not change the state, nor report a
307 race. */
308 Filter* filter;
309
310 /* A pointer back to the top level Thread structure. There is a
311 1-1 mapping between Thread and Thr structures -- each Thr points
312 at its corresponding Thread, and vice versa. Really, Thr and
313 Thread should be merged into a single structure. */
314 Thread* hgthread;
315
316 /* The ULongs (scalar Kws) in this accumulate in strictly
317 increasing order, without duplicates. This is important because
318 we need to be able to find a given scalar Kw in this array
319 later, by binary search. */
320 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
321};
322
323
324
325/////////////////////////////////////////////////////////////////
326/////////////////////////////////////////////////////////////////
327// //
328// data decls: SO //
329// //
330/////////////////////////////////////////////////////////////////
331/////////////////////////////////////////////////////////////////
332
333// (UInt) `echo "Synchronisation object" | md5sum`
334#define SO_MAGIC 0x56b3c5b0U
335
336struct _SO {
337 struct _SO* admin_prev;
338 struct _SO* admin_next;
339 VtsID viR; /* r-clock of sender */
340 VtsID viW; /* w-clock of sender */
341 UInt magic;
342};
343
344
345
346/////////////////////////////////////////////////////////////////
347/////////////////////////////////////////////////////////////////
348// //
sewardj8f5374e2008-12-07 11:40:17 +0000349// Forward declarations //
350// //
351/////////////////////////////////////////////////////////////////
352/////////////////////////////////////////////////////////////////
353
sewardjf98e1c02008-10-25 16:22:41 +0000354/* fwds for
355 Globals needed by other parts of the library. These are set
356 once at startup and then never changed. */
357static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000358static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000359
sewardjffce8152011-06-24 10:09:41 +0000360/* misc fn and data fwdses */
361static void VtsID__rcinc ( VtsID ii );
362static void VtsID__rcdec ( VtsID ii );
363
364static inline Bool SVal__isC ( SVal s );
365static inline VtsID SVal__unC_Rmin ( SVal s );
366static inline VtsID SVal__unC_Wmin ( SVal s );
367static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
368
369/* A double linked list of all the SO's. */
370SO* admin_SO;
371
sewardjf98e1c02008-10-25 16:22:41 +0000372
373
374/////////////////////////////////////////////////////////////////
375/////////////////////////////////////////////////////////////////
376// //
377// SECTION BEGIN compressed shadow memory //
378// //
379/////////////////////////////////////////////////////////////////
380/////////////////////////////////////////////////////////////////
381
382#ifndef __HB_ZSM_H
383#define __HB_ZSM_H
384
sewardjf98e1c02008-10-25 16:22:41 +0000385/* Initialise the library. Once initialised, it will (or may) call
386 rcinc and rcdec in response to all the calls below, in order to
387 allow the user to do reference counting on the SVals stored herein.
388 It is important to understand, however, that due to internal
389 caching, the reference counts are in general inaccurate, and can be
390 both above or below the true reference count for an item. In
391 particular, the library may indicate that the reference count for
392 an item is zero, when in fact it is not.
393
394 To make the reference counting exact and therefore non-pointless,
395 call zsm_flush_cache. Immediately after it returns, the reference
396 counts for all items, as deduced by the caller by observing calls
397 to rcinc and rcdec, will be correct, and so any items with a zero
398 reference count may be freed (or at least considered to be
399 unreferenced by this library).
400*/
401static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
402
sewardj23f12002009-07-24 08:45:08 +0000403static void zsm_sset_range ( Addr, SizeT, SVal );
404static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000405static void zsm_flush_cache ( void );
406
407#endif /* ! __HB_ZSM_H */
408
409
sewardjf98e1c02008-10-25 16:22:41 +0000410/* Round a up to the next multiple of N. N must be a power of 2 */
411#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
412/* Round a down to the next multiple of N. N must be a power of 2 */
413#define ROUNDDN(a, N) ((a) & ~(N-1))
414
415
416
417/* ------ User-supplied RC functions ------ */
418static void(*rcinc)(SVal) = NULL;
419static void(*rcdec)(SVal) = NULL;
420
421
422/* ------ CacheLine ------ */
423
424#define N_LINE_BITS 6 /* must be >= 3 */
425#define N_LINE_ARANGE (1 << N_LINE_BITS)
426#define N_LINE_TREES (N_LINE_ARANGE >> 3)
427
428typedef
429 struct {
430 UShort descrs[N_LINE_TREES];
431 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
432 }
433 CacheLine;
434
435#define TREE_DESCR_16_0 (1<<0)
436#define TREE_DESCR_32_0 (1<<1)
437#define TREE_DESCR_16_1 (1<<2)
438#define TREE_DESCR_64 (1<<3)
439#define TREE_DESCR_16_2 (1<<4)
440#define TREE_DESCR_32_1 (1<<5)
441#define TREE_DESCR_16_3 (1<<6)
442#define TREE_DESCR_8_0 (1<<7)
443#define TREE_DESCR_8_1 (1<<8)
444#define TREE_DESCR_8_2 (1<<9)
445#define TREE_DESCR_8_3 (1<<10)
446#define TREE_DESCR_8_4 (1<<11)
447#define TREE_DESCR_8_5 (1<<12)
448#define TREE_DESCR_8_6 (1<<13)
449#define TREE_DESCR_8_7 (1<<14)
450#define TREE_DESCR_DTY (1<<15)
451
452typedef
453 struct {
454 SVal dict[4]; /* can represent up to 4 diff values in the line */
455 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
456 dict indexes */
457 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
458 LineF to use, and dict[2..] are also SVal_INVALID. */
459 }
460 LineZ; /* compressed rep for a cache line */
461
462typedef
463 struct {
464 Bool inUse;
465 SVal w64s[N_LINE_ARANGE];
466 }
467 LineF; /* full rep for a cache line */
468
469/* Shadow memory.
470 Primary map is a WordFM Addr SecMap*.
471 SecMaps cover some page-size-ish section of address space and hold
472 a compressed representation.
473 CacheLine-sized chunks of SecMaps are copied into a Cache, being
474 decompressed when moved into the cache and recompressed on the
475 way out. Because of this, the cache must operate as a writeback
476 cache, not a writethrough one.
477
478 Each SecMap must hold a power-of-2 number of CacheLines. Hence
479 N_SECMAP_BITS must >= N_LINE_BITS.
480*/
481#define N_SECMAP_BITS 13
482#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
483
484// # CacheLines held by a SecMap
485#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
486
487/* The data in the SecMap is held in the array of LineZs. Each LineZ
488 either carries the required data directly, in a compressed
489 representation, or it holds (in .dict[0]) an index to the LineF in
490 .linesF that holds the full representation.
491
492 Currently-unused LineF's have their .inUse bit set to zero.
493 Since each in-use LineF is referred to be exactly one LineZ,
494 the number of .linesZ[] that refer to .linesF should equal
495 the number of .linesF[] that have .inUse == True.
496
497 RC obligations: the RCs presented to the user include exactly
498 the values in:
499 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
500 * F reps that are in use (.inUse == True)
501
502 Hence the following actions at the following transitions are required:
503
504 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
505 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
506 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
507 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
508*/
509typedef
510 struct {
511 UInt magic;
512 LineZ linesZ[N_SECMAP_ZLINES];
513 LineF* linesF;
514 UInt linesF_size;
515 }
516 SecMap;
517
518#define SecMap_MAGIC 0x571e58cbU
519
520static inline Bool is_sane_SecMap ( SecMap* sm ) {
521 return sm != NULL && sm->magic == SecMap_MAGIC;
522}
523
524/* ------ Cache ------ */
525
526#define N_WAY_BITS 16
527#define N_WAY_NENT (1 << N_WAY_BITS)
528
529/* Each tag is the address of the associated CacheLine, rounded down
530 to a CacheLine address boundary. A CacheLine size must be a power
531 of 2 and must be 8 or more. Hence an easy way to initialise the
532 cache so it is empty is to set all the tag values to any value % 8
533 != 0, eg 1. This means all queries in the cache initially miss.
534 It does however require us to detect and not writeback, any line
535 with a bogus tag. */
536typedef
537 struct {
538 CacheLine lyns0[N_WAY_NENT];
539 Addr tags0[N_WAY_NENT];
540 }
541 Cache;
542
543static inline Bool is_valid_scache_tag ( Addr tag ) {
544 /* a valid tag should be naturally aligned to the start of
545 a CacheLine. */
546 return 0 == (tag & (N_LINE_ARANGE - 1));
547}
548
549
550/* --------- Primary data structures --------- */
551
552/* Shadow memory primary map */
553static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
554static Cache cache_shmem;
555
556
557static UWord stats__secmaps_search = 0; // # SM finds
558static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
559static UWord stats__secmaps_allocd = 0; // # SecMaps issued
560static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
561static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
562static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
563static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
564static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
565static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
566static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
567static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
568static UWord stats__cache_F_fetches = 0; // # F lines fetched
569static UWord stats__cache_F_wbacks = 0; // # F lines written back
570static UWord stats__cache_invals = 0; // # cache invals
571static UWord stats__cache_flushes = 0; // # cache flushes
572static UWord stats__cache_totrefs = 0; // # total accesses
573static UWord stats__cache_totmisses = 0; // # misses
574static ULong stats__cache_make_New_arange = 0; // total arange made New
575static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
576static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000577static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
578static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
579static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
580static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
581static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
582static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
583static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
584static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
585static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
586static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
587static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
588static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
589static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
590static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000591static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
592static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
593static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
594static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
595static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
596static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000597static UWord stats__vts__tick = 0; // # calls to VTS__tick
598static UWord stats__vts__join = 0; // # calls to VTS__join
599static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
600static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
sewardj7aa38a92011-02-27 23:04:12 +0000601
602// # calls to VTS__cmp_structural w/ slow case
603static UWord stats__vts__cmp_structural_slow = 0;
604
605// # calls to VTS__indexAt_SLOW
606static UWord stats__vts__indexat_slow = 0;
607
608// # calls to vts_set__find__or__clone_and_add
609static UWord stats__vts_set__focaa = 0;
610
611// # calls to vts_set__find__or__clone_and_add that lead to an
612// allocation
613static UWord stats__vts_set__focaa_a = 0;
sewardjc8028ad2010-05-05 09:34:42 +0000614
sewardjf98e1c02008-10-25 16:22:41 +0000615
616static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
617 return a & ~(N_SECMAP_ARANGE - 1);
618}
619static inline UWord shmem__get_SecMap_offset ( Addr a ) {
620 return a & (N_SECMAP_ARANGE - 1);
621}
622
623
624/*----------------------------------------------------------------*/
625/*--- map_shmem :: WordFM Addr SecMap ---*/
626/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
627/*----------------------------------------------------------------*/
628
629/*--------------- SecMap allocation --------------- */
630
631static HChar* shmem__bigchunk_next = NULL;
632static HChar* shmem__bigchunk_end1 = NULL;
633
634static void* shmem__bigchunk_alloc ( SizeT n )
635{
636 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
637 tl_assert(n > 0);
638 n = VG_ROUNDUP(n, 16);
639 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
640 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
641 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
642 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
643 if (0)
644 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
645 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
646 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
647 if (shmem__bigchunk_next == NULL)
648 VG_(out_of_memory_NORETURN)(
649 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
650 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
651 }
652 tl_assert(shmem__bigchunk_next);
653 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
654 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
655 shmem__bigchunk_next += n;
656 return shmem__bigchunk_next - n;
657}
658
659static SecMap* shmem__alloc_SecMap ( void )
660{
661 Word i, j;
662 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
663 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
664 tl_assert(sm);
665 sm->magic = SecMap_MAGIC;
666 for (i = 0; i < N_SECMAP_ZLINES; i++) {
667 sm->linesZ[i].dict[0] = SVal_NOACCESS;
668 sm->linesZ[i].dict[1] = SVal_INVALID;
669 sm->linesZ[i].dict[2] = SVal_INVALID;
670 sm->linesZ[i].dict[3] = SVal_INVALID;
671 for (j = 0; j < N_LINE_ARANGE/4; j++)
672 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
673 }
674 sm->linesF = NULL;
675 sm->linesF_size = 0;
676 stats__secmaps_allocd++;
677 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
678 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
679 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
680 return sm;
681}
682
683typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
684static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
685
686static SecMap* shmem__find_SecMap ( Addr ga )
687{
688 SecMap* sm = NULL;
689 Addr gaKey = shmem__round_to_SecMap_base(ga);
690 // Cache
691 stats__secmaps_search++;
692 if (LIKELY(gaKey == smCache[0].gaKey))
693 return smCache[0].sm;
694 if (LIKELY(gaKey == smCache[1].gaKey)) {
695 SMCacheEnt tmp = smCache[0];
696 smCache[0] = smCache[1];
697 smCache[1] = tmp;
698 return smCache[0].sm;
699 }
700 if (gaKey == smCache[2].gaKey) {
701 SMCacheEnt tmp = smCache[1];
702 smCache[1] = smCache[2];
703 smCache[2] = tmp;
704 return smCache[1].sm;
705 }
706 // end Cache
707 stats__secmaps_search_slow++;
708 if (VG_(lookupFM)( map_shmem,
709 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
710 tl_assert(sm != NULL);
711 smCache[2] = smCache[1];
712 smCache[1] = smCache[0];
713 smCache[0].gaKey = gaKey;
714 smCache[0].sm = sm;
715 } else {
716 tl_assert(sm == NULL);
717 }
718 return sm;
719}
720
721static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
722{
723 SecMap* sm = shmem__find_SecMap ( ga );
724 if (LIKELY(sm)) {
725 return sm;
726 } else {
727 /* create a new one */
728 Addr gaKey = shmem__round_to_SecMap_base(ga);
729 sm = shmem__alloc_SecMap();
730 tl_assert(sm);
731 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
732 return sm;
733 }
734}
735
736
737/* ------------ LineF and LineZ related ------------ */
738
739static void rcinc_LineF ( LineF* lineF ) {
740 UWord i;
741 tl_assert(lineF->inUse);
742 for (i = 0; i < N_LINE_ARANGE; i++)
743 rcinc(lineF->w64s[i]);
744}
745
746static void rcdec_LineF ( LineF* lineF ) {
747 UWord i;
748 tl_assert(lineF->inUse);
749 for (i = 0; i < N_LINE_ARANGE; i++)
750 rcdec(lineF->w64s[i]);
751}
752
753static void rcinc_LineZ ( LineZ* lineZ ) {
754 tl_assert(lineZ->dict[0] != SVal_INVALID);
755 rcinc(lineZ->dict[0]);
756 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
757 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
758 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
759}
760
761static void rcdec_LineZ ( LineZ* lineZ ) {
762 tl_assert(lineZ->dict[0] != SVal_INVALID);
763 rcdec(lineZ->dict[0]);
764 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
765 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
766 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
767}
768
769inline
770static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
771 Word bix, shft, mask, prep;
772 tl_assert(ix >= 0);
773 bix = ix >> 2;
774 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
775 mask = 3 << shft;
776 prep = b2 << shft;
777 arr[bix] = (arr[bix] & ~mask) | prep;
778}
779
780inline
781static UWord read_twobit_array ( UChar* arr, UWord ix ) {
782 Word bix, shft;
783 tl_assert(ix >= 0);
784 bix = ix >> 2;
785 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
786 return (arr[bix] >> shft) & 3;
787}
788
789/* Given address 'tag', find either the Z or F line containing relevant
790 data, so it can be read into the cache.
791*/
792static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
793 /*OUT*/LineF** fp, Addr tag ) {
794 LineZ* lineZ;
795 LineF* lineF;
796 UWord zix;
797 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
798 UWord smoff = shmem__get_SecMap_offset(tag);
799 /* since smoff is derived from a valid tag, it should be
800 cacheline-aligned. */
801 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
802 zix = smoff >> N_LINE_BITS;
803 tl_assert(zix < N_SECMAP_ZLINES);
804 lineZ = &sm->linesZ[zix];
805 lineF = NULL;
806 if (lineZ->dict[0] == SVal_INVALID) {
807 UInt fix = (UInt)lineZ->dict[1];
808 tl_assert(sm->linesF);
809 tl_assert(sm->linesF_size > 0);
810 tl_assert(fix >= 0 && fix < sm->linesF_size);
811 lineF = &sm->linesF[fix];
812 tl_assert(lineF->inUse);
813 lineZ = NULL;
814 }
815 *zp = lineZ;
816 *fp = lineF;
817}
818
819/* Given address 'tag', return the relevant SecMap and the index of
820 the LineZ within it, in the expectation that the line is to be
821 overwritten. Regardless of whether 'tag' is currently associated
822 with a Z or F representation, to rcdec on the current
823 representation, in recognition of the fact that the contents are
824 just about to be overwritten. */
825static __attribute__((noinline))
826void find_Z_for_writing ( /*OUT*/SecMap** smp,
827 /*OUT*/Word* zixp,
828 Addr tag ) {
829 LineZ* lineZ;
830 LineF* lineF;
831 UWord zix;
832 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
833 UWord smoff = shmem__get_SecMap_offset(tag);
834 /* since smoff is derived from a valid tag, it should be
835 cacheline-aligned. */
836 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
837 zix = smoff >> N_LINE_BITS;
838 tl_assert(zix < N_SECMAP_ZLINES);
839 lineZ = &sm->linesZ[zix];
840 lineF = NULL;
841 /* re RCs, we are freeing up this LineZ/LineF so that new data can
842 be parked in it. Hence have to rcdec it accordingly. */
843 /* If lineZ has an associated lineF, free it up. */
844 if (lineZ->dict[0] == SVal_INVALID) {
845 UInt fix = (UInt)lineZ->dict[1];
846 tl_assert(sm->linesF);
847 tl_assert(sm->linesF_size > 0);
848 tl_assert(fix >= 0 && fix < sm->linesF_size);
849 lineF = &sm->linesF[fix];
850 tl_assert(lineF->inUse);
851 rcdec_LineF(lineF);
852 lineF->inUse = False;
853 } else {
854 rcdec_LineZ(lineZ);
855 }
856 *smp = sm;
857 *zixp = zix;
858}
859
860static __attribute__((noinline))
861void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
862 UInt i, new_size;
863 LineF* nyu;
864
865 if (sm->linesF) {
866 tl_assert(sm->linesF_size > 0);
867 } else {
868 tl_assert(sm->linesF_size == 0);
869 }
870
871 if (sm->linesF) {
872 for (i = 0; i < sm->linesF_size; i++) {
873 if (!sm->linesF[i].inUse) {
874 *fixp = (Word)i;
875 return;
876 }
877 }
878 }
879
880 /* No free F line found. Expand existing array and try again. */
881 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
882 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
883 new_size * sizeof(LineF) );
884 tl_assert(nyu);
885
886 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
887 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
888 * sizeof(LineF);
889
890 if (0)
891 VG_(printf)("SM %p: expand F array from %d to %d\n",
892 sm, (Int)sm->linesF_size, new_size);
893
894 for (i = 0; i < new_size; i++)
895 nyu[i].inUse = False;
896
897 if (sm->linesF) {
898 for (i = 0; i < sm->linesF_size; i++) {
899 tl_assert(sm->linesF[i].inUse);
900 nyu[i] = sm->linesF[i];
901 }
902 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
903 HG_(free)(sm->linesF);
904 }
905
906 sm->linesF = nyu;
907 sm->linesF_size = new_size;
908
909 for (i = 0; i < sm->linesF_size; i++) {
910 if (!sm->linesF[i].inUse) {
911 *fixp = (Word)i;
912 return;
913 }
914 }
915
916 /*NOTREACHED*/
917 tl_assert(0);
918}
919
920
921/* ------------ CacheLine and implicit-tree related ------------ */
922
923__attribute__((unused))
924static void pp_CacheLine ( CacheLine* cl ) {
925 Word i;
926 if (!cl) {
927 VG_(printf)("%s","pp_CacheLine(NULL)\n");
928 return;
929 }
930 for (i = 0; i < N_LINE_TREES; i++)
931 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
932 for (i = 0; i < N_LINE_ARANGE; i++)
933 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
934}
935
936static UChar descr_to_validbits ( UShort descr )
937{
938 /* a.k.a Party Time for gcc's constant folder */
939# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
940 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
941 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
942 ( (b8_5) << 12) | ( (b8_4) << 11) | \
943 ( (b8_3) << 10) | ( (b8_2) << 9) | \
944 ( (b8_1) << 8) | ( (b8_0) << 7) | \
945 ( (b16_3) << 6) | ( (b32_1) << 5) | \
946 ( (b16_2) << 4) | ( (b64) << 3) | \
947 ( (b16_1) << 2) | ( (b32_0) << 1) | \
948 ( (b16_0) << 0) ) )
949
950# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
951 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
952 ( (bit5) << 5) | ( (bit4) << 4) | \
953 ( (bit3) << 3) | ( (bit2) << 2) | \
954 ( (bit1) << 1) | ( (bit0) << 0) ) )
955
956 /* these should all get folded out at compile time */
957 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
958 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
959 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
960 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
961 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
962 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
963 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
964 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
965 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
966
967 switch (descr) {
968 /*
969 +--------------------------------- TREE_DESCR_8_7
970 | +------------------- TREE_DESCR_8_0
971 | | +---------------- TREE_DESCR_16_3
972 | | | +-------------- TREE_DESCR_32_1
973 | | | | +------------ TREE_DESCR_16_2
974 | | | | | +--------- TREE_DESCR_64
975 | | | | | | +------ TREE_DESCR_16_1
976 | | | | | | | +---- TREE_DESCR_32_0
977 | | | | | | | | +-- TREE_DESCR_16_0
978 | | | | | | | | |
979 | | | | | | | | | GRANULARITY, 7 -> 0 */
980 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
981 return BYTE(1,1,1,1,1,1,1,1);
982 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
983 return BYTE(1,1,0,1,1,1,1,1);
984 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
985 return BYTE(0,1,1,1,1,1,1,1);
986 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
987 return BYTE(0,1,0,1,1,1,1,1);
988
989 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
990 return BYTE(1,1,1,1,1,1,0,1);
991 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
992 return BYTE(1,1,0,1,1,1,0,1);
993 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
994 return BYTE(0,1,1,1,1,1,0,1);
995 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
996 return BYTE(0,1,0,1,1,1,0,1);
997
998 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
999 return BYTE(1,1,1,1,0,1,1,1);
1000 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1001 return BYTE(1,1,0,1,0,1,1,1);
1002 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1003 return BYTE(0,1,1,1,0,1,1,1);
1004 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1005 return BYTE(0,1,0,1,0,1,1,1);
1006
1007 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1008 return BYTE(1,1,1,1,0,1,0,1);
1009 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1010 return BYTE(1,1,0,1,0,1,0,1);
1011 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1012 return BYTE(0,1,1,1,0,1,0,1);
1013 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1014 return BYTE(0,1,0,1,0,1,0,1);
1015
1016 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1017 return BYTE(0,0,0,1,1,1,1,1);
1018 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1019 return BYTE(0,0,0,1,1,1,0,1);
1020 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1021 return BYTE(0,0,0,1,0,1,1,1);
1022 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1023 return BYTE(0,0,0,1,0,1,0,1);
1024
1025 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1026 return BYTE(1,1,1,1,0,0,0,1);
1027 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1028 return BYTE(1,1,0,1,0,0,0,1);
1029 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1030 return BYTE(0,1,1,1,0,0,0,1);
1031 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1032 return BYTE(0,1,0,1,0,0,0,1);
1033
1034 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1035 return BYTE(0,0,0,1,0,0,0,1);
1036
1037 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1038 return BYTE(0,0,0,0,0,0,0,1);
1039
1040 default: return BYTE(0,0,0,0,0,0,0,0);
1041 /* INVALID - any valid descr produces at least one
1042 valid bit in tree[0..7]*/
1043 }
1044 /* NOTREACHED*/
1045 tl_assert(0);
1046
1047# undef DESCR
1048# undef BYTE
1049}
1050
1051__attribute__((unused))
1052static Bool is_sane_Descr ( UShort descr ) {
1053 return descr_to_validbits(descr) != 0;
1054}
1055
1056static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1057 VG_(sprintf)(dst,
1058 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1059 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1060 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1061 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1062 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1063 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1064 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1065 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1066 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1067 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1068 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1069 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1070 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1071 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1072 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1073 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1074 );
1075}
1076static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1077 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1078 (Int)((byte & 128) ? 1 : 0),
1079 (Int)((byte & 64) ? 1 : 0),
1080 (Int)((byte & 32) ? 1 : 0),
1081 (Int)((byte & 16) ? 1 : 0),
1082 (Int)((byte & 8) ? 1 : 0),
1083 (Int)((byte & 4) ? 1 : 0),
1084 (Int)((byte & 2) ? 1 : 0),
1085 (Int)((byte & 1) ? 1 : 0)
1086 );
1087}
1088
1089static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1090 Word i;
1091 UChar validbits = descr_to_validbits(descr);
1092 HChar buf[128], buf2[128];
1093 if (validbits == 0)
1094 goto bad;
1095 for (i = 0; i < 8; i++) {
1096 if (validbits & (1<<i)) {
1097 if (tree[i] == SVal_INVALID)
1098 goto bad;
1099 } else {
1100 if (tree[i] != SVal_INVALID)
1101 goto bad;
1102 }
1103 }
1104 return True;
1105 bad:
1106 sprintf_Descr( buf, descr );
1107 sprintf_Byte( buf2, validbits );
1108 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1109 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1110 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1111 for (i = 0; i < 8; i++)
1112 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1113 VG_(printf)("%s","}\n");
1114 return 0;
1115}
1116
1117static Bool is_sane_CacheLine ( CacheLine* cl )
1118{
1119 Word tno, cloff;
1120
1121 if (!cl) goto bad;
1122
1123 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1124 UShort descr = cl->descrs[tno];
1125 SVal* tree = &cl->svals[cloff];
1126 if (!is_sane_Descr_and_Tree(descr, tree))
1127 goto bad;
1128 }
1129 tl_assert(cloff == N_LINE_ARANGE);
1130 return True;
1131 bad:
1132 pp_CacheLine(cl);
1133 return False;
1134}
1135
1136static UShort normalise_tree ( /*MOD*/SVal* tree )
1137{
1138 UShort descr;
1139 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1140 particular no zeroes. */
1141 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1142 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1143 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1144 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
1145 tl_assert(0);
1146
1147 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1148 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1149 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1150 /* build 16-bit layer */
1151 if (tree[1] == tree[0]) {
1152 tree[1] = SVal_INVALID;
1153 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1154 descr |= TREE_DESCR_16_0;
1155 }
1156 if (tree[3] == tree[2]) {
1157 tree[3] = SVal_INVALID;
1158 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1159 descr |= TREE_DESCR_16_1;
1160 }
1161 if (tree[5] == tree[4]) {
1162 tree[5] = SVal_INVALID;
1163 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1164 descr |= TREE_DESCR_16_2;
1165 }
1166 if (tree[7] == tree[6]) {
1167 tree[7] = SVal_INVALID;
1168 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1169 descr |= TREE_DESCR_16_3;
1170 }
1171 /* build 32-bit layer */
1172 if (tree[2] == tree[0]
1173 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1174 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1175 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1176 descr |= TREE_DESCR_32_0;
1177 }
1178 if (tree[6] == tree[4]
1179 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1180 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1181 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1182 descr |= TREE_DESCR_32_1;
1183 }
1184 /* build 64-bit layer */
1185 if (tree[4] == tree[0]
1186 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1187 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1188 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1189 descr |= TREE_DESCR_64;
1190 }
1191 return descr;
1192}
1193
1194/* This takes a cacheline where all the data is at the leaves
1195 (w8[..]) and builds a correctly normalised tree. */
1196static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1197{
1198 Word tno, cloff;
1199 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1200 SVal* tree = &cl->svals[cloff];
1201 cl->descrs[tno] = normalise_tree( tree );
1202 }
1203 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +00001204 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001205 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1206 stats__cline_normalises++;
1207}
1208
1209
1210typedef struct { UChar count; SVal sval; } CountedSVal;
1211
1212static
1213void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1214 /*OUT*/Word* dstUsedP,
1215 Word nDst, CacheLine* src )
1216{
1217 Word tno, cloff, dstUsed;
1218
1219 tl_assert(nDst == N_LINE_ARANGE);
1220 dstUsed = 0;
1221
1222 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1223 UShort descr = src->descrs[tno];
1224 SVal* tree = &src->svals[cloff];
1225
1226 /* sequentialise the tree described by (descr,tree). */
1227# define PUT(_n,_v) \
1228 do { dst[dstUsed ].count = (_n); \
1229 dst[dstUsed++].sval = (_v); \
1230 } while (0)
1231
1232 /* byte 0 */
1233 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1234 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1235 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1236 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1237 /* byte 1 */
1238 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1239 /* byte 2 */
1240 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1241 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1242 /* byte 3 */
1243 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1244 /* byte 4 */
1245 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1246 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1247 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1248 /* byte 5 */
1249 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1250 /* byte 6 */
1251 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1252 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1253 /* byte 7 */
1254 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1255
1256# undef PUT
1257 /* END sequentialise the tree described by (descr,tree). */
1258
1259 }
1260 tl_assert(cloff == N_LINE_ARANGE);
1261 tl_assert(dstUsed <= nDst);
1262
1263 *dstUsedP = dstUsed;
1264}
1265
1266/* Write the cacheline 'wix' to backing store. Where it ends up
1267 is determined by its tag field. */
1268static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1269{
1270 Word i, j, k, m;
1271 Addr tag;
1272 SecMap* sm;
1273 CacheLine* cl;
1274 LineZ* lineZ;
1275 LineF* lineF;
1276 Word zix, fix, csvalsUsed;
1277 CountedSVal csvals[N_LINE_ARANGE];
1278 SVal sv;
1279
1280 if (0)
1281 VG_(printf)("scache wback line %d\n", (Int)wix);
1282
1283 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1284
1285 tag = cache_shmem.tags0[wix];
1286 cl = &cache_shmem.lyns0[wix];
1287
1288 /* The cache line may have been invalidated; if so, ignore it. */
1289 if (!is_valid_scache_tag(tag))
1290 return;
1291
1292 /* Where are we going to put it? */
1293 sm = NULL;
1294 lineZ = NULL;
1295 lineF = NULL;
1296 zix = fix = -1;
1297
1298 /* find the Z line to write in and rcdec it or the associated F
1299 line. */
1300 find_Z_for_writing( &sm, &zix, tag );
1301
1302 tl_assert(sm);
1303 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1304 lineZ = &sm->linesZ[zix];
1305
1306 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001307 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001308 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1309
1310 csvalsUsed = -1;
1311 sequentialise_CacheLine( csvals, &csvalsUsed,
1312 N_LINE_ARANGE, cl );
1313 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1314 if (0) VG_(printf)("%lu ", csvalsUsed);
1315
1316 lineZ->dict[0] = lineZ->dict[1]
1317 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1318
1319 /* i indexes actual shadow values, k is cursor in csvals */
1320 i = 0;
1321 for (k = 0; k < csvalsUsed; k++) {
1322
1323 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001324 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001325 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1326 /* do we already have it? */
1327 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1328 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1329 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1330 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1331 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001332 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001333 tl_assert(sv != SVal_INVALID);
1334 if (lineZ->dict[0]
1335 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1336 if (lineZ->dict[1]
1337 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1338 if (lineZ->dict[2]
1339 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1340 if (lineZ->dict[3]
1341 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1342 break; /* we'll have to use the f rep */
1343 dict_ok:
1344 m = csvals[k].count;
1345 if (m == 8) {
1346 write_twobit_array( lineZ->ix2s, i+0, j );
1347 write_twobit_array( lineZ->ix2s, i+1, j );
1348 write_twobit_array( lineZ->ix2s, i+2, j );
1349 write_twobit_array( lineZ->ix2s, i+3, j );
1350 write_twobit_array( lineZ->ix2s, i+4, j );
1351 write_twobit_array( lineZ->ix2s, i+5, j );
1352 write_twobit_array( lineZ->ix2s, i+6, j );
1353 write_twobit_array( lineZ->ix2s, i+7, j );
1354 i += 8;
1355 }
1356 else if (m == 4) {
1357 write_twobit_array( lineZ->ix2s, i+0, j );
1358 write_twobit_array( lineZ->ix2s, i+1, j );
1359 write_twobit_array( lineZ->ix2s, i+2, j );
1360 write_twobit_array( lineZ->ix2s, i+3, j );
1361 i += 4;
1362 }
1363 else if (m == 1) {
1364 write_twobit_array( lineZ->ix2s, i+0, j );
1365 i += 1;
1366 }
1367 else if (m == 2) {
1368 write_twobit_array( lineZ->ix2s, i+0, j );
1369 write_twobit_array( lineZ->ix2s, i+1, j );
1370 i += 2;
1371 }
1372 else {
1373 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1374 }
1375
1376 }
1377
1378 if (LIKELY(i == N_LINE_ARANGE)) {
1379 /* Construction of the compressed representation was
1380 successful. */
1381 rcinc_LineZ(lineZ);
1382 stats__cache_Z_wbacks++;
1383 } else {
1384 /* Cannot use the compressed(z) representation. Use the full(f)
1385 rep instead. */
1386 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1387 alloc_F_for_writing( sm, &fix );
1388 tl_assert(sm->linesF);
1389 tl_assert(sm->linesF_size > 0);
1390 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1391 lineF = &sm->linesF[fix];
1392 tl_assert(!lineF->inUse);
1393 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1394 lineZ->dict[1] = (SVal)fix;
1395 lineF->inUse = True;
1396 i = 0;
1397 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001398 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001399 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1400 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001401 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001402 tl_assert(sv != SVal_INVALID);
1403 for (m = csvals[k].count; m > 0; m--) {
1404 lineF->w64s[i] = sv;
1405 i++;
1406 }
1407 }
1408 tl_assert(i == N_LINE_ARANGE);
1409 rcinc_LineF(lineF);
1410 stats__cache_F_wbacks++;
1411 }
sewardjf98e1c02008-10-25 16:22:41 +00001412}
1413
1414/* Fetch the cacheline 'wix' from the backing store. The tag
1415 associated with 'wix' is assumed to have already been filled in;
1416 hence that is used to determine where in the backing store to read
1417 from. */
1418static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1419{
1420 Word i;
1421 Addr tag;
1422 CacheLine* cl;
1423 LineZ* lineZ;
1424 LineF* lineF;
1425
1426 if (0)
1427 VG_(printf)("scache fetch line %d\n", (Int)wix);
1428
1429 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1430
1431 tag = cache_shmem.tags0[wix];
1432 cl = &cache_shmem.lyns0[wix];
1433
1434 /* reject nonsense requests */
1435 tl_assert(is_valid_scache_tag(tag));
1436
1437 lineZ = NULL;
1438 lineF = NULL;
1439 find_ZF_for_reading( &lineZ, &lineF, tag );
1440 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1441
1442 /* expand the data into the bottom layer of the tree, then get
1443 cacheline_normalise to build the descriptor array. */
1444 if (lineF) {
1445 tl_assert(lineF->inUse);
1446 for (i = 0; i < N_LINE_ARANGE; i++) {
1447 cl->svals[i] = lineF->w64s[i];
1448 }
1449 stats__cache_F_fetches++;
1450 } else {
1451 for (i = 0; i < N_LINE_ARANGE; i++) {
1452 SVal sv;
1453 UWord ix = read_twobit_array( lineZ->ix2s, i );
1454 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1455 sv = lineZ->dict[ix];
1456 tl_assert(sv != SVal_INVALID);
1457 cl->svals[i] = sv;
1458 }
1459 stats__cache_Z_fetches++;
1460 }
1461 normalise_CacheLine( cl );
1462}
1463
1464static void shmem__invalidate_scache ( void ) {
1465 Word wix;
1466 if (0) VG_(printf)("%s","scache inval\n");
1467 tl_assert(!is_valid_scache_tag(1));
1468 for (wix = 0; wix < N_WAY_NENT; wix++) {
1469 cache_shmem.tags0[wix] = 1/*INVALID*/;
1470 }
1471 stats__cache_invals++;
1472}
1473
1474static void shmem__flush_and_invalidate_scache ( void ) {
1475 Word wix;
1476 Addr tag;
1477 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1478 tl_assert(!is_valid_scache_tag(1));
1479 for (wix = 0; wix < N_WAY_NENT; wix++) {
1480 tag = cache_shmem.tags0[wix];
1481 if (tag == 1/*INVALID*/) {
1482 /* already invalid; nothing to do */
1483 } else {
1484 tl_assert(is_valid_scache_tag(tag));
1485 cacheline_wback( wix );
1486 }
1487 cache_shmem.tags0[wix] = 1/*INVALID*/;
1488 }
1489 stats__cache_flushes++;
1490 stats__cache_invals++;
1491}
1492
1493
1494static inline Bool aligned16 ( Addr a ) {
1495 return 0 == (a & 1);
1496}
1497static inline Bool aligned32 ( Addr a ) {
1498 return 0 == (a & 3);
1499}
1500static inline Bool aligned64 ( Addr a ) {
1501 return 0 == (a & 7);
1502}
1503static inline UWord get_cacheline_offset ( Addr a ) {
1504 return (UWord)(a & (N_LINE_ARANGE - 1));
1505}
1506static inline Addr cacheline_ROUNDUP ( Addr a ) {
1507 return ROUNDUP(a, N_LINE_ARANGE);
1508}
1509static inline Addr cacheline_ROUNDDN ( Addr a ) {
1510 return ROUNDDN(a, N_LINE_ARANGE);
1511}
1512static inline UWord get_treeno ( Addr a ) {
1513 return get_cacheline_offset(a) >> 3;
1514}
1515static inline UWord get_tree_offset ( Addr a ) {
1516 return a & 7;
1517}
1518
1519static __attribute__((noinline))
1520 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1521static inline CacheLine* get_cacheline ( Addr a )
1522{
1523 /* tag is 'a' with the in-line offset masked out,
1524 eg a[31]..a[4] 0000 */
1525 Addr tag = a & ~(N_LINE_ARANGE - 1);
1526 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1527 stats__cache_totrefs++;
1528 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1529 return &cache_shmem.lyns0[wix];
1530 } else {
1531 return get_cacheline_MISS( a );
1532 }
1533}
1534
1535static __attribute__((noinline))
1536 CacheLine* get_cacheline_MISS ( Addr a )
1537{
1538 /* tag is 'a' with the in-line offset masked out,
1539 eg a[31]..a[4] 0000 */
1540
1541 CacheLine* cl;
1542 Addr* tag_old_p;
1543 Addr tag = a & ~(N_LINE_ARANGE - 1);
1544 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1545
1546 tl_assert(tag != cache_shmem.tags0[wix]);
1547
1548 /* Dump the old line into the backing store. */
1549 stats__cache_totmisses++;
1550
1551 cl = &cache_shmem.lyns0[wix];
1552 tag_old_p = &cache_shmem.tags0[wix];
1553
1554 if (is_valid_scache_tag( *tag_old_p )) {
1555 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001556 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001557 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1558 cacheline_wback( wix );
1559 }
1560 /* and reload the new one */
1561 *tag_old_p = tag;
1562 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001563 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001564 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1565 return cl;
1566}
1567
1568static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1569 stats__cline_64to32pulldown++;
1570 switch (toff) {
1571 case 0: case 4:
1572 tl_assert(descr & TREE_DESCR_64);
1573 tree[4] = tree[0];
1574 descr &= ~TREE_DESCR_64;
1575 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1576 break;
1577 default:
1578 tl_assert(0);
1579 }
1580 return descr;
1581}
1582
1583static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1584 stats__cline_32to16pulldown++;
1585 switch (toff) {
1586 case 0: case 2:
1587 if (!(descr & TREE_DESCR_32_0)) {
1588 descr = pulldown_to_32(tree, 0, descr);
1589 }
1590 tl_assert(descr & TREE_DESCR_32_0);
1591 tree[2] = tree[0];
1592 descr &= ~TREE_DESCR_32_0;
1593 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1594 break;
1595 case 4: case 6:
1596 if (!(descr & TREE_DESCR_32_1)) {
1597 descr = pulldown_to_32(tree, 4, descr);
1598 }
1599 tl_assert(descr & TREE_DESCR_32_1);
1600 tree[6] = tree[4];
1601 descr &= ~TREE_DESCR_32_1;
1602 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1603 break;
1604 default:
1605 tl_assert(0);
1606 }
1607 return descr;
1608}
1609
1610static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1611 stats__cline_16to8pulldown++;
1612 switch (toff) {
1613 case 0: case 1:
1614 if (!(descr & TREE_DESCR_16_0)) {
1615 descr = pulldown_to_16(tree, 0, descr);
1616 }
1617 tl_assert(descr & TREE_DESCR_16_0);
1618 tree[1] = tree[0];
1619 descr &= ~TREE_DESCR_16_0;
1620 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1621 break;
1622 case 2: case 3:
1623 if (!(descr & TREE_DESCR_16_1)) {
1624 descr = pulldown_to_16(tree, 2, descr);
1625 }
1626 tl_assert(descr & TREE_DESCR_16_1);
1627 tree[3] = tree[2];
1628 descr &= ~TREE_DESCR_16_1;
1629 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1630 break;
1631 case 4: case 5:
1632 if (!(descr & TREE_DESCR_16_2)) {
1633 descr = pulldown_to_16(tree, 4, descr);
1634 }
1635 tl_assert(descr & TREE_DESCR_16_2);
1636 tree[5] = tree[4];
1637 descr &= ~TREE_DESCR_16_2;
1638 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1639 break;
1640 case 6: case 7:
1641 if (!(descr & TREE_DESCR_16_3)) {
1642 descr = pulldown_to_16(tree, 6, descr);
1643 }
1644 tl_assert(descr & TREE_DESCR_16_3);
1645 tree[7] = tree[6];
1646 descr &= ~TREE_DESCR_16_3;
1647 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1648 break;
1649 default:
1650 tl_assert(0);
1651 }
1652 return descr;
1653}
1654
1655
1656static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1657 UShort mask;
1658 switch (toff) {
1659 case 0:
1660 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1661 tl_assert( (descr & mask) == mask );
1662 descr &= ~mask;
1663 descr |= TREE_DESCR_16_0;
1664 break;
1665 case 2:
1666 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1667 tl_assert( (descr & mask) == mask );
1668 descr &= ~mask;
1669 descr |= TREE_DESCR_16_1;
1670 break;
1671 case 4:
1672 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1673 tl_assert( (descr & mask) == mask );
1674 descr &= ~mask;
1675 descr |= TREE_DESCR_16_2;
1676 break;
1677 case 6:
1678 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1679 tl_assert( (descr & mask) == mask );
1680 descr &= ~mask;
1681 descr |= TREE_DESCR_16_3;
1682 break;
1683 default:
1684 tl_assert(0);
1685 }
1686 return descr;
1687}
1688
1689static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1690 UShort mask;
1691 switch (toff) {
1692 case 0:
1693 if (!(descr & TREE_DESCR_16_0))
1694 descr = pullup_descr_to_16(descr, 0);
1695 if (!(descr & TREE_DESCR_16_1))
1696 descr = pullup_descr_to_16(descr, 2);
1697 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1698 tl_assert( (descr & mask) == mask );
1699 descr &= ~mask;
1700 descr |= TREE_DESCR_32_0;
1701 break;
1702 case 4:
1703 if (!(descr & TREE_DESCR_16_2))
1704 descr = pullup_descr_to_16(descr, 4);
1705 if (!(descr & TREE_DESCR_16_3))
1706 descr = pullup_descr_to_16(descr, 6);
1707 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1708 tl_assert( (descr & mask) == mask );
1709 descr &= ~mask;
1710 descr |= TREE_DESCR_32_1;
1711 break;
1712 default:
1713 tl_assert(0);
1714 }
1715 return descr;
1716}
1717
1718static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1719 switch (toff) {
1720 case 0: case 4:
1721 return 0 != (descr & TREE_DESCR_64);
1722 default:
1723 tl_assert(0);
1724 }
1725}
1726
1727static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1728 switch (toff) {
1729 case 0:
1730 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1731 case 2:
1732 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1733 case 4:
1734 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1735 case 6:
1736 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1737 default:
1738 tl_assert(0);
1739 }
1740}
1741
1742/* ------------ Cache management ------------ */
1743
1744static void zsm_flush_cache ( void )
1745{
1746 shmem__flush_and_invalidate_scache();
1747}
1748
1749
1750static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1751{
1752 tl_assert( sizeof(UWord) == sizeof(Addr) );
1753
1754 rcinc = p_rcinc;
1755 rcdec = p_rcdec;
1756
1757 tl_assert(map_shmem == NULL);
1758 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1759 HG_(free),
1760 NULL/*unboxed UWord cmp*/);
1761 tl_assert(map_shmem != NULL);
1762 shmem__invalidate_scache();
1763
1764 /* a SecMap must contain an integral number of CacheLines */
1765 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1766 /* also ... a CacheLine holds an integral number of trees */
1767 tl_assert(0 == (N_LINE_ARANGE % 8));
1768}
1769
1770/////////////////////////////////////////////////////////////////
1771/////////////////////////////////////////////////////////////////
1772// //
1773// SECTION END compressed shadow memory //
1774// //
1775/////////////////////////////////////////////////////////////////
1776/////////////////////////////////////////////////////////////////
1777
1778
1779
1780/////////////////////////////////////////////////////////////////
1781/////////////////////////////////////////////////////////////////
1782// //
1783// SECTION BEGIN vts primitives //
1784// //
1785/////////////////////////////////////////////////////////////////
1786/////////////////////////////////////////////////////////////////
1787
sewardjf98e1c02008-10-25 16:22:41 +00001788
sewardje4cce742011-02-24 15:25:24 +00001789/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1790 being compact stand-ins for Thr*'s. Use these functions to map
1791 between them. */
1792static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
1793static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
1794
sewardje4cce742011-02-24 15:25:24 +00001795__attribute__((noreturn))
1796static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
1797{
1798 if (due_to_nThrs) {
florian6bf37262012-10-21 03:23:36 +00001799 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001800 "\n"
1801 "Helgrind: cannot continue, run aborted: too many threads.\n"
1802 "Sorry. Helgrind can only handle programs that create\n"
1803 "%'llu or fewer threads over their entire lifetime.\n"
1804 "\n";
sewardj03e7d272011-05-04 09:08:34 +00001805 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
sewardje4cce742011-02-24 15:25:24 +00001806 } else {
florian6bf37262012-10-21 03:23:36 +00001807 const HChar* s =
sewardje4cce742011-02-24 15:25:24 +00001808 "\n"
1809 "Helgrind: cannot continue, run aborted: too many\n"
1810 "synchronisation events. Sorry. Helgrind can only handle\n"
1811 "programs which perform %'llu or fewer\n"
1812 "inter-thread synchronisation events (locks, unlocks, etc).\n"
1813 "\n";
1814 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
1815 }
1816 VG_(exit)(1);
1817 /*NOTREACHED*/
1818 tl_assert(0); /*wtf?!*/
1819}
1820
1821
sewardjffce8152011-06-24 10:09:41 +00001822/* The dead thread (ThrID, actually) table. A thread may only be
1823 listed here if we have been notified thereof by libhb_async_exit.
1824 New entries are added at the end. The order isn't important, but
1825 the ThrID values must be unique. This table lists the identity of
1826 all threads that have ever died -- none are ever removed. We keep
1827 this table so as to be able to prune entries from VTSs. We don't
1828 actually need to keep the set of threads that have ever died --
1829 only the threads that have died since the previous round of
1830 pruning. But it's useful for sanity check purposes to keep the
1831 entire set, so we do. */
1832static XArray* /* of ThrID */ verydead_thread_table = NULL;
1833
1834/* Arbitrary total ordering on ThrIDs. */
florian6bd9dc12012-11-23 16:17:43 +00001835static Int cmp__ThrID ( const void* v1, const void* v2 ) {
1836 ThrID id1 = *(const ThrID*)v1;
1837 ThrID id2 = *(const ThrID*)v2;
sewardjffce8152011-06-24 10:09:41 +00001838 if (id1 < id2) return -1;
1839 if (id1 > id2) return 1;
1840 return 0;
1841}
1842
1843static void verydead_thread_table_init ( void )
1844{
1845 tl_assert(!verydead_thread_table);
1846 verydead_thread_table
1847 = VG_(newXA)( HG_(zalloc),
1848 "libhb.verydead_thread_table_init.1",
1849 HG_(free), sizeof(ThrID) );
1850 tl_assert(verydead_thread_table);
1851 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
1852}
1853
sewardjf98e1c02008-10-25 16:22:41 +00001854
1855/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1856 a backlink for the caller's convenience. Since we have no idea
1857 what to set that to in the library, it always gets set to
1858 VtsID_INVALID. */
1859typedef
1860 struct {
sewardj7aa38a92011-02-27 23:04:12 +00001861 VtsID id;
1862 UInt usedTS;
1863 UInt sizeTS;
1864 ScalarTS ts[0];
sewardjf98e1c02008-10-25 16:22:41 +00001865 }
1866 VTS;
1867
sewardj7aa38a92011-02-27 23:04:12 +00001868/* Allocate a VTS capable of storing 'sizeTS' entries. */
florian6bd9dc12012-11-23 16:17:43 +00001869static VTS* VTS__new ( const HChar* who, UInt sizeTS );
sewardjf98e1c02008-10-25 16:22:41 +00001870
sewardjffce8152011-06-24 10:09:41 +00001871/* Make a clone of 'vts', sizing the new array to exactly match the
sewardj7aa38a92011-02-27 23:04:12 +00001872 number of ScalarTSs present. */
florian6bd9dc12012-11-23 16:17:43 +00001873static VTS* VTS__clone ( const HChar* who, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001874
sewardjffce8152011-06-24 10:09:41 +00001875/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
1876 array is sized exactly to hold the number of required elements.
1877 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
1878 must be in strictly increasing order. */
florian6bd9dc12012-11-23 16:17:43 +00001879static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
sewardjffce8152011-06-24 10:09:41 +00001880
sewardjf98e1c02008-10-25 16:22:41 +00001881/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00001882static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001883
sewardj7aa38a92011-02-27 23:04:12 +00001884/* Create a new singleton VTS in 'out'. Caller must have
1885 pre-allocated 'out' sufficiently big to hold the result in all
1886 possible cases. */
1887static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00001888
sewardj7aa38a92011-02-27 23:04:12 +00001889/* Create in 'out' a VTS which is the same as 'vts' except with
1890 vts[me]++, so to speak. Caller must have pre-allocated 'out'
1891 sufficiently big to hold the result in all possible cases. */
1892static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001893
sewardj7aa38a92011-02-27 23:04:12 +00001894/* Create in 'out' a VTS which is the join (max) of 'a' and
1895 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
1896 the result in all possible cases. */
1897static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001898
sewardj23f12002009-07-24 08:45:08 +00001899/* Compute the partial ordering relation of the two args. Although we
1900 could be completely general and return an enumeration value (EQ,
1901 LT, GT, UN), in fact we only need LEQ, and so we may as well
1902 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00001903
sewardje4cce742011-02-24 15:25:24 +00001904 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
1905 invald ThrID). In the latter case, the returned ThrID indicates
1906 the discovered point for which they are not. There may be more
1907 than one such point, but we only care about seeing one of them, not
1908 all of them. This rather strange convention is used because
1909 sometimes we want to know the actual index at which they first
1910 differ. */
1911static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001912
1913/* Compute an arbitrary structural (total) ordering on the two args,
1914 based on their VCs, so they can be looked up in a table, tree, etc.
1915 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00001916static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001917
1918/* Debugging only. Display the given VTS in the buffer. */
sewardj23f12002009-07-24 08:45:08 +00001919static void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001920
1921/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00001922static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001923
sewardjffce8152011-06-24 10:09:41 +00001924/* Notify the VTS machinery that a thread has been declared
1925 comprehensively dead: that is, it has done an async exit AND it has
1926 been joined with. This should ensure that its local clocks (.viR
1927 and .viW) will never again change, and so all mentions of this
1928 thread from all VTSs in the system may be removed. */
1929static void VTS__declare_thread_very_dead ( Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001930
1931/*--------------- to do with Vector Timestamps ---------------*/
1932
sewardjf98e1c02008-10-25 16:22:41 +00001933static Bool is_sane_VTS ( VTS* vts )
1934{
1935 UWord i, n;
1936 ScalarTS *st1, *st2;
1937 if (!vts) return False;
1938 if (!vts->ts) return False;
sewardj555fc572011-02-27 23:39:53 +00001939 if (vts->usedTS > vts->sizeTS) return False;
sewardj7aa38a92011-02-27 23:04:12 +00001940 n = vts->usedTS;
1941 if (n == 1) {
1942 st1 = &vts->ts[0];
1943 if (st1->tym == 0)
1944 return False;
1945 }
1946 else
sewardjf98e1c02008-10-25 16:22:41 +00001947 if (n >= 2) {
1948 for (i = 0; i < n-1; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00001949 st1 = &vts->ts[i];
1950 st2 = &vts->ts[i+1];
sewardje4cce742011-02-24 15:25:24 +00001951 if (st1->thrid >= st2->thrid)
sewardjf98e1c02008-10-25 16:22:41 +00001952 return False;
1953 if (st1->tym == 0 || st2->tym == 0)
1954 return False;
1955 }
1956 }
1957 return True;
1958}
1959
1960
sewardj7aa38a92011-02-27 23:04:12 +00001961/* Create a new, empty VTS.
sewardjf98e1c02008-10-25 16:22:41 +00001962*/
florian6bd9dc12012-11-23 16:17:43 +00001963static VTS* VTS__new ( const HChar* who, UInt sizeTS )
sewardjf98e1c02008-10-25 16:22:41 +00001964{
sewardj7aa38a92011-02-27 23:04:12 +00001965 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
1966 tl_assert(vts->usedTS == 0);
1967 vts->sizeTS = sizeTS;
1968 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
sewardjf98e1c02008-10-25 16:22:41 +00001969 return vts;
1970}
1971
sewardj7aa38a92011-02-27 23:04:12 +00001972/* Clone this VTS.
1973*/
florian6bd9dc12012-11-23 16:17:43 +00001974static VTS* VTS__clone ( const HChar* who, VTS* vts )
sewardj7aa38a92011-02-27 23:04:12 +00001975{
1976 tl_assert(vts);
1977 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1978 UInt nTS = vts->usedTS;
1979 VTS* clone = VTS__new(who, nTS);
1980 clone->id = vts->id;
1981 clone->sizeTS = nTS;
1982 clone->usedTS = nTS;
1983 UInt i;
1984 for (i = 0; i < nTS; i++) {
1985 clone->ts[i] = vts->ts[i];
1986 }
1987 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1988 return clone;
1989}
1990
sewardjf98e1c02008-10-25 16:22:41 +00001991
sewardjffce8152011-06-24 10:09:41 +00001992/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
1993 must be in strictly increasing order. We could obviously do this
1994 much more efficiently (in linear time) if necessary.
1995*/
florian6bd9dc12012-11-23 16:17:43 +00001996static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
sewardjffce8152011-06-24 10:09:41 +00001997{
1998 UInt i, j;
1999 tl_assert(vts);
2000 tl_assert(thridsToDel);
2001 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2002 UInt nTS = vts->usedTS;
2003 /* Figure out how many ScalarTSs will remain in the output. */
2004 UInt nReq = nTS;
2005 for (i = 0; i < nTS; i++) {
2006 ThrID thrid = vts->ts[i].thrid;
2007 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2008 nReq--;
2009 }
2010 tl_assert(nReq <= nTS);
2011 /* Copy the ones that will remain. */
2012 VTS* res = VTS__new(who, nReq);
2013 j = 0;
2014 for (i = 0; i < nTS; i++) {
2015 ThrID thrid = vts->ts[i].thrid;
2016 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2017 continue;
2018 res->ts[j++] = vts->ts[i];
2019 }
2020 tl_assert(j == nReq);
2021 tl_assert(j == res->sizeTS);
2022 res->usedTS = j;
2023 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2024 return res;
2025}
2026
2027
sewardjf98e1c02008-10-25 16:22:41 +00002028/* Delete this VTS in its entirety.
2029*/
sewardj7aa38a92011-02-27 23:04:12 +00002030static void VTS__delete ( VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002031{
2032 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002033 tl_assert(vts->usedTS <= vts->sizeTS);
2034 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
sewardjf98e1c02008-10-25 16:22:41 +00002035 HG_(free)(vts);
2036}
2037
2038
2039/* Create a new singleton VTS.
2040*/
sewardj7aa38a92011-02-27 23:04:12 +00002041static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2042{
sewardjf98e1c02008-10-25 16:22:41 +00002043 tl_assert(thr);
2044 tl_assert(tym >= 1);
sewardj7aa38a92011-02-27 23:04:12 +00002045 tl_assert(out);
2046 tl_assert(out->usedTS == 0);
2047 tl_assert(out->sizeTS >= 1);
2048 UInt hi = out->usedTS++;
2049 out->ts[hi].thrid = Thr__to_ThrID(thr);
2050 out->ts[hi].tym = tym;
sewardjf98e1c02008-10-25 16:22:41 +00002051}
2052
2053
2054/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2055 not modified.
2056*/
sewardj7aa38a92011-02-27 23:04:12 +00002057static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002058{
sewardj7aa38a92011-02-27 23:04:12 +00002059 UInt i, n;
sewardje4cce742011-02-24 15:25:24 +00002060 ThrID me_thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002061 Bool found = False;
sewardjc8028ad2010-05-05 09:34:42 +00002062
2063 stats__vts__tick++;
2064
sewardj7aa38a92011-02-27 23:04:12 +00002065 tl_assert(out);
2066 tl_assert(out->usedTS == 0);
2067 if (vts->usedTS >= ThrID_MAX_VALID)
2068 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2069 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2070
sewardjf98e1c02008-10-25 16:22:41 +00002071 tl_assert(me);
sewardje4cce742011-02-24 15:25:24 +00002072 me_thrid = Thr__to_ThrID(me);
sewardjf98e1c02008-10-25 16:22:41 +00002073 tl_assert(is_sane_VTS(vts));
sewardj7aa38a92011-02-27 23:04:12 +00002074 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002075
sewardj555fc572011-02-27 23:39:53 +00002076 /* Copy all entries which precede 'me'. */
2077 for (i = 0; i < n; i++) {
2078 ScalarTS* here = &vts->ts[i];
2079 if (UNLIKELY(here->thrid >= me_thrid))
2080 break;
2081 UInt hi = out->usedTS++;
2082 out->ts[hi] = *here;
2083 }
2084
2085 /* 'i' now indicates the next entry to copy, if any.
2086 There are 3 possibilities:
2087 (a) there is no next entry (we used them all up already):
2088 add (me_thrid,1) to the output, and quit
2089 (b) there is a next entry, and its thrid > me_thrid:
2090 add (me_thrid,1) to the output, then copy the remaining entries
2091 (c) there is a next entry, and its thrid == me_thrid:
2092 copy it to the output but increment its timestamp value.
2093 Then copy the remaining entries. (c) is the common case.
2094 */
2095 tl_assert(i >= 0 && i <= n);
2096 if (i == n) { /* case (a) */
sewardj7aa38a92011-02-27 23:04:12 +00002097 UInt hi = out->usedTS++;
2098 out->ts[hi].thrid = me_thrid;
2099 out->ts[hi].tym = 1;
sewardj555fc572011-02-27 23:39:53 +00002100 } else {
2101 /* cases (b) and (c) */
2102 ScalarTS* here = &vts->ts[i];
2103 if (me_thrid == here->thrid) { /* case (c) */
sewardj7aa38a92011-02-27 23:04:12 +00002104 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
sewardje4cce742011-02-24 15:25:24 +00002105 /* We're hosed. We have to stop. */
2106 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2107 }
sewardj7aa38a92011-02-27 23:04:12 +00002108 UInt hi = out->usedTS++;
2109 out->ts[hi].thrid = here->thrid;
2110 out->ts[hi].tym = here->tym + 1;
sewardjf98e1c02008-10-25 16:22:41 +00002111 i++;
sewardj555fc572011-02-27 23:39:53 +00002112 found = True;
2113 } else { /* case (b) */
sewardj7aa38a92011-02-27 23:04:12 +00002114 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002115 out->ts[hi].thrid = me_thrid;
2116 out->ts[hi].tym = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002117 }
sewardj555fc572011-02-27 23:39:53 +00002118 /* And copy any remaining entries. */
sewardjf98e1c02008-10-25 16:22:41 +00002119 for (/*keepgoing*/; i < n; i++) {
sewardj555fc572011-02-27 23:39:53 +00002120 ScalarTS* here2 = &vts->ts[i];
sewardj7aa38a92011-02-27 23:04:12 +00002121 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002122 out->ts[hi] = *here2;
sewardjf98e1c02008-10-25 16:22:41 +00002123 }
2124 }
sewardj555fc572011-02-27 23:39:53 +00002125
sewardj7aa38a92011-02-27 23:04:12 +00002126 tl_assert(is_sane_VTS(out));
2127 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2128 tl_assert(out->usedTS <= out->sizeTS);
sewardjf98e1c02008-10-25 16:22:41 +00002129}
2130
2131
2132/* Return a new VTS constructed as the join (max) of the 2 args.
2133 Neither arg is modified.
2134*/
sewardj7aa38a92011-02-27 23:04:12 +00002135static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002136{
sewardj7aa38a92011-02-27 23:04:12 +00002137 UInt ia, ib, useda, usedb;
sewardjf98e1c02008-10-25 16:22:41 +00002138 ULong tyma, tymb, tymMax;
sewardje4cce742011-02-24 15:25:24 +00002139 ThrID thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002140 UInt ncommon = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002141
sewardjc8028ad2010-05-05 09:34:42 +00002142 stats__vts__join++;
2143
sewardj7aa38a92011-02-27 23:04:12 +00002144 tl_assert(a);
2145 tl_assert(b);
2146 useda = a->usedTS;
2147 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002148
sewardj7aa38a92011-02-27 23:04:12 +00002149 tl_assert(out);
2150 tl_assert(out->usedTS == 0);
2151 /* overly conservative test, but doing better involves comparing
2152 the two VTSs, which we don't want to do at this point. */
2153 if (useda + usedb >= ThrID_MAX_VALID)
2154 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2155 tl_assert(out->sizeTS >= useda + usedb);
2156
sewardjf98e1c02008-10-25 16:22:41 +00002157 ia = ib = 0;
2158
2159 while (1) {
2160
sewardje4cce742011-02-24 15:25:24 +00002161 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2162 from a and b in order, where thrid is the next ThrID
sewardjf98e1c02008-10-25 16:22:41 +00002163 occurring in either a or b, and tyma/b are the relevant
2164 scalar timestamps, taking into account implicit zeroes. */
2165 tl_assert(ia >= 0 && ia <= useda);
2166 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002167
njn4c245e52009-03-15 23:25:38 +00002168 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002169 /* both empty - done */
2170 break;
njn4c245e52009-03-15 23:25:38 +00002171
2172 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002173 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002174 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002175 thrid = tmpb->thrid;
2176 tyma = 0;
2177 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002178 ib++;
njn4c245e52009-03-15 23:25:38 +00002179
2180 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002181 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002182 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002183 thrid = tmpa->thrid;
2184 tyma = tmpa->tym;
2185 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002186 ia++;
njn4c245e52009-03-15 23:25:38 +00002187
2188 } else {
sewardje4cce742011-02-24 15:25:24 +00002189 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002190 ScalarTS* tmpa = &a->ts[ia];
2191 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002192 if (tmpa->thrid < tmpb->thrid) {
2193 /* a has the lowest unconsidered ThrID */
2194 thrid = tmpa->thrid;
2195 tyma = tmpa->tym;
2196 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002197 ia++;
sewardje4cce742011-02-24 15:25:24 +00002198 } else if (tmpa->thrid > tmpb->thrid) {
2199 /* b has the lowest unconsidered ThrID */
2200 thrid = tmpb->thrid;
2201 tyma = 0;
2202 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002203 ib++;
2204 } else {
sewardje4cce742011-02-24 15:25:24 +00002205 /* they both next mention the same ThrID */
2206 tl_assert(tmpa->thrid == tmpb->thrid);
2207 thrid = tmpa->thrid; /* == tmpb->thrid */
2208 tyma = tmpa->tym;
2209 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002210 ia++;
2211 ib++;
sewardj7aa38a92011-02-27 23:04:12 +00002212 ncommon++;
sewardjf98e1c02008-10-25 16:22:41 +00002213 }
2214 }
2215
2216 /* having laboriously determined (thr, tyma, tymb), do something
2217 useful with it. */
2218 tymMax = tyma > tymb ? tyma : tymb;
2219 if (tymMax > 0) {
sewardj7aa38a92011-02-27 23:04:12 +00002220 UInt hi = out->usedTS++;
2221 out->ts[hi].thrid = thrid;
2222 out->ts[hi].tym = tymMax;
sewardjf98e1c02008-10-25 16:22:41 +00002223 }
2224
2225 }
2226
sewardj7aa38a92011-02-27 23:04:12 +00002227 tl_assert(is_sane_VTS(out));
2228 tl_assert(out->usedTS <= out->sizeTS);
2229 tl_assert(out->usedTS == useda + usedb - ncommon);
sewardjf98e1c02008-10-25 16:22:41 +00002230}
2231
2232
sewardje4cce742011-02-24 15:25:24 +00002233/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2234 they are, or the first ThrID for which they are not (no valid ThrID
2235 has the value zero). This rather strange convention is used
2236 because sometimes we want to know the actual index at which they
2237 first differ. */
2238static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002239{
sewardj23f12002009-07-24 08:45:08 +00002240 Word ia, ib, useda, usedb;
2241 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00002242
sewardjc8028ad2010-05-05 09:34:42 +00002243 stats__vts__cmpLEQ++;
2244
sewardj7aa38a92011-02-27 23:04:12 +00002245 tl_assert(a);
2246 tl_assert(b);
2247 useda = a->usedTS;
2248 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002249
2250 ia = ib = 0;
2251
2252 while (1) {
2253
njn4c245e52009-03-15 23:25:38 +00002254 /* This logic is to enumerate doubles (tyma, tymb) drawn
2255 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00002256 scalar timestamps, taking into account implicit zeroes. */
sewardje4cce742011-02-24 15:25:24 +00002257 ThrID thrid;
sewardj23f12002009-07-24 08:45:08 +00002258
sewardjf98e1c02008-10-25 16:22:41 +00002259 tl_assert(ia >= 0 && ia <= useda);
2260 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002261
njn4c245e52009-03-15 23:25:38 +00002262 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002263 /* both empty - done */
2264 break;
njn4c245e52009-03-15 23:25:38 +00002265
2266 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002267 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002268 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002269 tyma = 0;
2270 tymb = tmpb->tym;
2271 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002272 ib++;
njn4c245e52009-03-15 23:25:38 +00002273
2274 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002275 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002276 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002277 tyma = tmpa->tym;
2278 thrid = tmpa->thrid;
2279 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002280 ia++;
njn4c245e52009-03-15 23:25:38 +00002281
2282 } else {
sewardje4cce742011-02-24 15:25:24 +00002283 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002284 ScalarTS* tmpa = &a->ts[ia];
2285 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002286 if (tmpa->thrid < tmpb->thrid) {
2287 /* a has the lowest unconsidered ThrID */
2288 tyma = tmpa->tym;
2289 thrid = tmpa->thrid;
2290 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002291 ia++;
2292 }
2293 else
sewardje4cce742011-02-24 15:25:24 +00002294 if (tmpa->thrid > tmpb->thrid) {
2295 /* b has the lowest unconsidered ThrID */
2296 tyma = 0;
2297 tymb = tmpb->tym;
2298 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002299 ib++;
2300 } else {
sewardje4cce742011-02-24 15:25:24 +00002301 /* they both next mention the same ThrID */
2302 tl_assert(tmpa->thrid == tmpb->thrid);
2303 tyma = tmpa->tym;
2304 thrid = tmpa->thrid;
2305 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002306 ia++;
2307 ib++;
2308 }
2309 }
2310
njn4c245e52009-03-15 23:25:38 +00002311 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00002312 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00002313 if (tyma > tymb) {
2314 /* not LEQ at this index. Quit, since the answer is
2315 determined already. */
sewardje4cce742011-02-24 15:25:24 +00002316 tl_assert(thrid >= 1024);
2317 return thrid;
sewardj23f12002009-07-24 08:45:08 +00002318 }
sewardjf98e1c02008-10-25 16:22:41 +00002319 }
2320
sewardje4cce742011-02-24 15:25:24 +00002321 return 0; /* all points are LEQ => return an invalid ThrID */
sewardjf98e1c02008-10-25 16:22:41 +00002322}
2323
2324
2325/* Compute an arbitrary structural (total) ordering on the two args,
2326 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00002327 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2328 performance critical so there is some effort expended to make it sa
2329 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00002330*/
2331Word VTS__cmp_structural ( VTS* a, VTS* b )
2332{
2333 /* We just need to generate an arbitrary total ordering based on
2334 a->ts and b->ts. Preferably do it in a way which comes across likely
2335 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00002336 Word i;
2337 Word useda = 0, usedb = 0;
2338 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002339
sewardjc8028ad2010-05-05 09:34:42 +00002340 stats__vts__cmp_structural++;
2341
2342 tl_assert(a);
2343 tl_assert(b);
2344
sewardj7aa38a92011-02-27 23:04:12 +00002345 ctsa = &a->ts[0]; useda = a->usedTS;
2346 ctsb = &b->ts[0]; usedb = b->usedTS;
sewardjc8028ad2010-05-05 09:34:42 +00002347
2348 if (LIKELY(useda == usedb)) {
2349 ScalarTS *tmpa = NULL, *tmpb = NULL;
2350 stats__vts__cmp_structural_slow++;
2351 /* Same length vectors. Find the first difference, if any, as
2352 fast as possible. */
2353 for (i = 0; i < useda; i++) {
2354 tmpa = &ctsa[i];
2355 tmpb = &ctsb[i];
sewardje4cce742011-02-24 15:25:24 +00002356 if (LIKELY(tmpa->tym == tmpb->tym
2357 && tmpa->thrid == tmpb->thrid))
sewardjc8028ad2010-05-05 09:34:42 +00002358 continue;
2359 else
2360 break;
2361 }
2362 if (UNLIKELY(i == useda)) {
2363 /* They're identical. */
2364 return 0;
2365 } else {
2366 tl_assert(i >= 0 && i < useda);
2367 if (tmpa->tym < tmpb->tym) return -1;
2368 if (tmpa->tym > tmpb->tym) return 1;
sewardje4cce742011-02-24 15:25:24 +00002369 if (tmpa->thrid < tmpb->thrid) return -1;
2370 if (tmpa->thrid > tmpb->thrid) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002371 /* we just established them as non-identical, hence: */
2372 }
2373 /*NOTREACHED*/
2374 tl_assert(0);
2375 }
sewardjf98e1c02008-10-25 16:22:41 +00002376
2377 if (useda < usedb) return -1;
2378 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002379 /*NOTREACHED*/
2380 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00002381}
2382
2383
2384/* Debugging only. Display the given VTS in the buffer.
2385*/
sewardj7aa38a92011-02-27 23:04:12 +00002386void VTS__show ( HChar* buf, Int nBuf, VTS* vts )
2387{
sewardjf98e1c02008-10-25 16:22:41 +00002388 ScalarTS* st;
2389 HChar unit[64];
2390 Word i, n;
2391 Int avail = nBuf;
2392 tl_assert(vts && vts->ts);
2393 tl_assert(nBuf > 16);
2394 buf[0] = '[';
2395 buf[1] = 0;
sewardj7aa38a92011-02-27 23:04:12 +00002396 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002397 for (i = 0; i < n; i++) {
2398 tl_assert(avail >= 40);
sewardj7aa38a92011-02-27 23:04:12 +00002399 st = &vts->ts[i];
sewardjf98e1c02008-10-25 16:22:41 +00002400 VG_(memset)(unit, 0, sizeof(unit));
sewardje4cce742011-02-24 15:25:24 +00002401 VG_(sprintf)(unit, i < n-1 ? "%u:%llu " : "%u:%llu",
2402 st->thrid, (ULong)st->tym);
sewardjf98e1c02008-10-25 16:22:41 +00002403 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
2404 VG_(strcat)(buf, " ...]");
2405 buf[nBuf-1] = 0;
2406 return;
2407 }
2408 VG_(strcat)(buf, unit);
2409 avail -= VG_(strlen)(unit);
2410 }
2411 VG_(strcat)(buf, "]");
2412 buf[nBuf-1] = 0;
2413}
2414
2415
2416/* Debugging only. Return vts[index], so to speak.
2417*/
sewardj7aa38a92011-02-27 23:04:12 +00002418ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2419{
sewardjf98e1c02008-10-25 16:22:41 +00002420 UWord i, n;
sewardje4cce742011-02-24 15:25:24 +00002421 ThrID idx_thrid = Thr__to_ThrID(idx);
sewardjc8028ad2010-05-05 09:34:42 +00002422 stats__vts__indexat_slow++;
sewardjf98e1c02008-10-25 16:22:41 +00002423 tl_assert(vts && vts->ts);
sewardj7aa38a92011-02-27 23:04:12 +00002424 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002425 for (i = 0; i < n; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002426 ScalarTS* st = &vts->ts[i];
sewardje4cce742011-02-24 15:25:24 +00002427 if (st->thrid == idx_thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002428 return st->tym;
2429 }
2430 return 0;
2431}
2432
2433
sewardjffce8152011-06-24 10:09:41 +00002434/* See comment on prototype above.
2435*/
2436static void VTS__declare_thread_very_dead ( Thr* thr )
2437{
2438 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2439
2440 tl_assert(thr->llexit_done);
2441 tl_assert(thr->joinedwith_done);
2442
2443 ThrID nyu;
2444 nyu = Thr__to_ThrID(thr);
2445 VG_(addToXA)( verydead_thread_table, &nyu );
2446
2447 /* We can only get here if we're assured that we'll never again
2448 need to look at this thread's ::viR or ::viW. Set them to
2449 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2450 mostly so that we don't wind up pruning them (as that would be
2451 nonsensical: the only interesting ScalarTS entry for a dead
2452 thread is its own index, and the pruning will remove that.). */
2453 VtsID__rcdec(thr->viR);
2454 VtsID__rcdec(thr->viW);
2455 thr->viR = VtsID_INVALID;
2456 thr->viW = VtsID_INVALID;
2457}
2458
2459
sewardjf98e1c02008-10-25 16:22:41 +00002460/////////////////////////////////////////////////////////////////
2461/////////////////////////////////////////////////////////////////
2462// //
2463// SECTION END vts primitives //
2464// //
2465/////////////////////////////////////////////////////////////////
2466/////////////////////////////////////////////////////////////////
2467
2468
2469
2470/////////////////////////////////////////////////////////////////
2471/////////////////////////////////////////////////////////////////
2472// //
2473// SECTION BEGIN main library //
2474// //
2475/////////////////////////////////////////////////////////////////
2476/////////////////////////////////////////////////////////////////
2477
2478
2479/////////////////////////////////////////////////////////
2480// //
2481// VTS set //
2482// //
2483/////////////////////////////////////////////////////////
2484
sewardjffce8152011-06-24 10:09:41 +00002485static WordFM* /* WordFM VTS* void */ vts_set = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002486
2487static void vts_set_init ( void )
2488{
2489 tl_assert(!vts_set);
2490 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2491 HG_(free),
2492 (Word(*)(UWord,UWord))VTS__cmp_structural );
2493 tl_assert(vts_set);
2494}
2495
sewardj7aa38a92011-02-27 23:04:12 +00002496/* Given a VTS, look in vts_set to see if we already have a
2497 structurally identical one. If yes, return the pair (True, pointer
2498 to the existing one). If no, clone this one, add the clone to the
2499 set, and return (False, pointer to the clone). */
2500static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002501{
2502 UWord keyW, valW;
sewardj7aa38a92011-02-27 23:04:12 +00002503 stats__vts_set__focaa++;
2504 tl_assert(cand->id == VtsID_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00002505 /* lookup cand (by value) */
2506 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2507 /* found it */
2508 tl_assert(valW == 0);
2509 /* if this fails, cand (by ref) was already present (!) */
2510 tl_assert(keyW != (UWord)cand);
sewardj7aa38a92011-02-27 23:04:12 +00002511 *res = (VTS*)keyW;
2512 return True;
sewardjf98e1c02008-10-25 16:22:41 +00002513 } else {
sewardj7aa38a92011-02-27 23:04:12 +00002514 /* not present. Clone, add and return address of clone. */
2515 stats__vts_set__focaa_a++;
2516 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2517 tl_assert(clone != cand);
2518 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2519 *res = clone;
2520 return False;
sewardjf98e1c02008-10-25 16:22:41 +00002521 }
2522}
2523
2524
2525/////////////////////////////////////////////////////////
2526// //
2527// VTS table //
2528// //
2529/////////////////////////////////////////////////////////
2530
2531static void VtsID__invalidate_caches ( void ); /* fwds */
2532
2533/* A type to hold VTS table entries. Invariants:
2534 If .vts == NULL, then this entry is not in use, so:
2535 - .rc == 0
2536 - this entry is on the freelist (unfortunately, does not imply
sewardjffce8152011-06-24 10:09:41 +00002537 any constraints on value for .freelink)
sewardjf98e1c02008-10-25 16:22:41 +00002538 If .vts != NULL, then this entry is in use:
2539 - .vts is findable in vts_set
2540 - .vts->id == this entry number
2541 - no specific value for .rc (even 0 is OK)
sewardjffce8152011-06-24 10:09:41 +00002542 - this entry is not on freelist, so .freelink == VtsID_INVALID
sewardjf98e1c02008-10-25 16:22:41 +00002543*/
2544typedef
2545 struct {
2546 VTS* vts; /* vts, in vts_set */
2547 UWord rc; /* reference count - enough for entire aspace */
2548 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
sewardjffce8152011-06-24 10:09:41 +00002549 VtsID remap; /* used only during pruning */
sewardjf98e1c02008-10-25 16:22:41 +00002550 }
2551 VtsTE;
2552
2553/* The VTS table. */
2554static XArray* /* of VtsTE */ vts_tab = NULL;
2555
2556/* An index into the VTS table, indicating the start of the list of
2557 free (available for use) entries. If the list is empty, this is
2558 VtsID_INVALID. */
2559static VtsID vts_tab_freelist = VtsID_INVALID;
2560
2561/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2562 vts_tab equals or exceeds this size. After GC, the value here is
2563 set appropriately so as to check for the next GC point. */
2564static Word vts_next_GC_at = 1000;
2565
2566static void vts_tab_init ( void )
2567{
2568 vts_tab
2569 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2570 HG_(free), sizeof(VtsTE) );
2571 vts_tab_freelist
2572 = VtsID_INVALID;
2573 tl_assert(vts_tab);
2574}
2575
2576/* Add ii to the free list, checking that it looks out-of-use. */
2577static void add_to_free_list ( VtsID ii )
2578{
2579 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2580 tl_assert(ie->vts == NULL);
2581 tl_assert(ie->rc == 0);
2582 tl_assert(ie->freelink == VtsID_INVALID);
2583 ie->freelink = vts_tab_freelist;
2584 vts_tab_freelist = ii;
2585}
2586
2587/* Get an entry from the free list. This will return VtsID_INVALID if
2588 the free list is empty. */
2589static VtsID get_from_free_list ( void )
2590{
2591 VtsID ii;
2592 VtsTE* ie;
2593 if (vts_tab_freelist == VtsID_INVALID)
2594 return VtsID_INVALID;
2595 ii = vts_tab_freelist;
2596 ie = VG_(indexXA)( vts_tab, ii );
2597 tl_assert(ie->vts == NULL);
2598 tl_assert(ie->rc == 0);
2599 vts_tab_freelist = ie->freelink;
2600 return ii;
2601}
2602
2603/* Produce a new VtsID that can be used, either by getting it from
2604 the freelist, or, if that is empty, by expanding vts_tab. */
2605static VtsID get_new_VtsID ( void )
2606{
2607 VtsID ii;
2608 VtsTE te;
2609 ii = get_from_free_list();
2610 if (ii != VtsID_INVALID)
2611 return ii;
2612 te.vts = NULL;
2613 te.rc = 0;
2614 te.freelink = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00002615 te.remap = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002616 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2617 return ii;
2618}
2619
2620
2621/* Indirect callback from lib_zsm. */
2622static void VtsID__rcinc ( VtsID ii )
2623{
2624 VtsTE* ie;
2625 /* VG_(indexXA) does a range check for us */
2626 ie = VG_(indexXA)( vts_tab, ii );
2627 tl_assert(ie->vts); /* else it's not in use */
2628 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2629 tl_assert(ie->vts->id == ii);
2630 ie->rc++;
2631}
2632
2633/* Indirect callback from lib_zsm. */
2634static void VtsID__rcdec ( VtsID ii )
2635{
2636 VtsTE* ie;
2637 /* VG_(indexXA) does a range check for us */
2638 ie = VG_(indexXA)( vts_tab, ii );
2639 tl_assert(ie->vts); /* else it's not in use */
2640 tl_assert(ie->rc > 0); /* else RC snafu */
2641 tl_assert(ie->vts->id == ii);
2642 ie->rc--;
2643}
2644
2645
sewardj7aa38a92011-02-27 23:04:12 +00002646/* Look up 'cand' in our collection of VTSs. If present, return the
2647 VtsID for the pre-existing version. If not present, clone it, add
2648 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2649 it, and return that. */
2650static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002651{
sewardj7aa38a92011-02-27 23:04:12 +00002652 VTS* in_tab = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002653 tl_assert(cand->id == VtsID_INVALID);
sewardj7aa38a92011-02-27 23:04:12 +00002654 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2655 tl_assert(in_tab);
2656 if (already_have) {
2657 /* We already have a copy of 'cand'. Use that. */
sewardjf98e1c02008-10-25 16:22:41 +00002658 VtsTE* ie;
sewardj7aa38a92011-02-27 23:04:12 +00002659 tl_assert(in_tab->id != VtsID_INVALID);
2660 ie = VG_(indexXA)( vts_tab, in_tab->id );
2661 tl_assert(ie->vts == in_tab);
2662 return in_tab->id;
sewardjf98e1c02008-10-25 16:22:41 +00002663 } else {
2664 VtsID ii = get_new_VtsID();
2665 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
sewardj7aa38a92011-02-27 23:04:12 +00002666 ie->vts = in_tab;
sewardjf98e1c02008-10-25 16:22:41 +00002667 ie->rc = 0;
2668 ie->freelink = VtsID_INVALID;
sewardj7aa38a92011-02-27 23:04:12 +00002669 in_tab->id = ii;
sewardjf98e1c02008-10-25 16:22:41 +00002670 return ii;
2671 }
2672}
2673
2674
florian6bd9dc12012-11-23 16:17:43 +00002675static void show_vts_stats ( const HChar* caller )
sewardjf98e1c02008-10-25 16:22:41 +00002676{
2677 UWord nSet, nTab, nLive;
2678 ULong totrc;
2679 UWord n, i;
2680 nSet = VG_(sizeFM)( vts_set );
2681 nTab = VG_(sizeXA)( vts_tab );
2682 totrc = 0;
2683 nLive = 0;
2684 n = VG_(sizeXA)( vts_tab );
2685 for (i = 0; i < n; i++) {
2686 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2687 if (ie->vts) {
2688 nLive++;
2689 totrc += (ULong)ie->rc;
2690 } else {
2691 tl_assert(ie->rc == 0);
2692 }
2693 }
2694 VG_(printf)(" show_vts_stats %s\n", caller);
2695 VG_(printf)(" vts_tab size %4lu\n", nTab);
2696 VG_(printf)(" vts_tab live %4lu\n", nLive);
2697 VG_(printf)(" vts_set size %4lu\n", nSet);
2698 VG_(printf)(" total rc %4llu\n", totrc);
2699}
2700
sewardjffce8152011-06-24 10:09:41 +00002701
2702/* --- Helpers for VtsID pruning --- */
2703
2704static
2705void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2706 /*MOD*/XArray* /* of VtsTE */ new_tab,
2707 VtsID* ii )
2708{
2709 VtsTE *old_te, *new_te;
2710 VtsID old_id, new_id;
2711 /* We're relying here on VG_(indexXA)'s range checking to assert on
2712 any stupid values, in particular *ii == VtsID_INVALID. */
2713 old_id = *ii;
2714 old_te = VG_(indexXA)( old_tab, old_id );
2715 old_te->rc--;
2716 new_id = old_te->remap;
2717 new_te = VG_(indexXA)( new_tab, new_id );
2718 new_te->rc++;
2719 *ii = new_id;
2720}
2721
2722static
2723void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2724 /*MOD*/XArray* /* of VtsTE */ new_tab,
2725 SVal* s )
2726{
2727 SVal old_sv, new_sv;
2728 old_sv = *s;
2729 if (SVal__isC(old_sv)) {
2730 VtsID rMin, wMin;
2731 rMin = SVal__unC_Rmin(old_sv);
2732 wMin = SVal__unC_Wmin(old_sv);
2733 remap_VtsID( old_tab, new_tab, &rMin );
2734 remap_VtsID( old_tab, new_tab, &wMin );
2735 new_sv = SVal__mkC( rMin, wMin );
2736 *s = new_sv;
2737 }
2738}
2739
2740
sewardjf98e1c02008-10-25 16:22:41 +00002741/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002742__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002743static void vts_tab__do_GC ( Bool show_stats )
2744{
2745 UWord i, nTab, nLive, nFreed;
2746
sewardjffce8152011-06-24 10:09:41 +00002747 /* ---------- BEGIN VTS GC ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00002748 /* check this is actually necessary. */
2749 tl_assert(vts_tab_freelist == VtsID_INVALID);
2750
2751 /* empty the caches for partial order checks and binary joins. We
2752 could do better and prune out the entries to be deleted, but it
2753 ain't worth the hassle. */
2754 VtsID__invalidate_caches();
2755
2756 /* First, make the reference counts up to date. */
2757 zsm_flush_cache();
2758
2759 nTab = VG_(sizeXA)( vts_tab );
2760
2761 if (show_stats) {
2762 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2763 show_vts_stats("before GC");
2764 }
2765
sewardjffce8152011-06-24 10:09:41 +00002766 /* Now we can inspect the entire vts_tab. Any entries with zero
2767 .rc fields are now no longer in use and can be put back on the
sewardjf98e1c02008-10-25 16:22:41 +00002768 free list, removed from vts_set, and deleted. */
2769 nFreed = 0;
2770 for (i = 0; i < nTab; i++) {
2771 Bool present;
sewardjffce8152011-06-24 10:09:41 +00002772 UWord oldK = 0, oldV = 12345;
sewardjf98e1c02008-10-25 16:22:41 +00002773 VtsTE* te = VG_(indexXA)( vts_tab, i );
2774 if (te->vts == NULL) {
2775 tl_assert(te->rc == 0);
2776 continue; /* already on the free list (presumably) */
2777 }
2778 if (te->rc > 0)
2779 continue; /* in use */
2780 /* Ok, we got one we can free. */
2781 tl_assert(te->vts->id == i);
2782 /* first, remove it from vts_set. */
2783 present = VG_(delFromFM)( vts_set,
2784 &oldK, &oldV, (UWord)te->vts );
2785 tl_assert(present); /* else it isn't in vts_set ?! */
2786 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2787 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2788 /* now free the VTS itself */
2789 VTS__delete(te->vts);
2790 te->vts = NULL;
2791 /* and finally put this entry on the free list */
2792 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2793 add_to_free_list( i );
2794 nFreed++;
2795 }
2796
2797 /* Now figure out when the next GC should be. We'll allow the
2798 number of VTSs to double before GCing again. Except of course
2799 that since we can't (or, at least, don't) shrink vts_tab, we
2800 can't set the threshhold value smaller than it. */
2801 tl_assert(nFreed <= nTab);
2802 nLive = nTab - nFreed;
2803 tl_assert(nLive >= 0 && nLive <= nTab);
2804 vts_next_GC_at = 2 * nLive;
2805 if (vts_next_GC_at < nTab)
2806 vts_next_GC_at = nTab;
2807
2808 if (show_stats) {
2809 show_vts_stats("after GC");
2810 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2811 }
2812
sewardj5e2ac3b2009-08-11 10:39:25 +00002813 if (VG_(clo_stats)) {
sewardjffce8152011-06-24 10:09:41 +00002814 static UInt ctr = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002815 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002816 VG_(message)(Vg_DebugMsg,
sewardj24118492009-07-15 14:50:02 +00002817 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
sewardj8aa41de2009-01-22 12:24:26 +00002818 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00002819 }
sewardjffce8152011-06-24 10:09:41 +00002820 /* ---------- END VTS GC ---------- */
2821
2822 /* Decide whether to do VTS pruning. We have one of three
2823 settings. */
2824 static UInt pruning_auto_ctr = 0; /* do not make non-static */
2825
2826 Bool do_pruning = False;
2827 switch (HG_(clo_vts_pruning)) {
2828 case 0: /* never */
2829 break;
2830 case 1: /* auto */
2831 do_pruning = (++pruning_auto_ctr % 5) == 0;
2832 break;
2833 case 2: /* always */
2834 do_pruning = True;
2835 break;
2836 default:
2837 tl_assert(0);
2838 }
2839
2840 /* The rest of this routine only handles pruning, so we can
2841 quit at this point if it is not to be done. */
2842 if (!do_pruning)
2843 return;
2844
2845 /* ---------- BEGIN VTS PRUNING ---------- */
2846 /* We begin by sorting the backing table on its .thr values, so as
2847 to (1) check they are unique [else something has gone wrong,
2848 since it means we must have seen some Thr* exiting more than
2849 once, which can't happen], and (2) so that we can quickly look
2850 up the dead-thread entries as we work through the VTSs. */
2851 VG_(sortXA)( verydead_thread_table );
2852 /* Sanity check: check for unique .sts.thr values. */
2853 UWord nBT = VG_(sizeXA)( verydead_thread_table );
2854 if (nBT > 0) {
2855 ThrID thrid1, thrid2;
2856 thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, 0 );
2857 for (i = 1; i < nBT; i++) {
2858 thrid1 = thrid2;
2859 thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, i );
2860 tl_assert(thrid1 < thrid2);
2861 }
2862 }
2863 /* Ok, so the dead thread table has unique and in-order keys. */
2864
2865 /* We will run through the old table, and create a new table and
2866 set, at the same time setting the .remap entries in the old
2867 table to point to the new entries. Then, visit every VtsID in
2868 the system, and replace all of them with new ones, using the
2869 .remap entries in the old table. Finally, we can delete the old
2870 table and set. */
2871
2872 XArray* /* of VtsTE */ new_tab
2873 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
2874 HG_(free), sizeof(VtsTE) );
2875
2876 /* WordFM VTS* void */
2877 WordFM* new_set
2878 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
2879 HG_(free),
2880 (Word(*)(UWord,UWord))VTS__cmp_structural );
2881
2882 /* Visit each old VTS. For each one:
2883
2884 * make a pruned version
2885
2886 * search new_set for the pruned version, yielding either
2887 Nothing (not present) or the new VtsID for it.
2888
2889 * if not present, allocate a new VtsID for it, insert (pruned
2890 VTS, new VtsID) in the tree, and set
2891 remap_table[old VtsID] = new VtsID.
2892
2893 * if present, set remap_table[old VtsID] = new VtsID, where
2894 new VtsID was determined by the tree lookup. Then free up
2895 the clone.
2896 */
2897
2898 UWord nBeforePruning = 0, nAfterPruning = 0;
2899 UWord nSTSsBefore = 0, nSTSsAfter = 0;
2900 VtsID new_VtsID_ctr = 0;
2901
2902 for (i = 0; i < nTab; i++) {
2903
2904 /* For each old VTS .. */
2905 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
2906 VTS* old_vts = old_te->vts;
2907 tl_assert(old_te->remap == VtsID_INVALID);
2908
2909 /* Skip it if not in use */
2910 if (old_te->rc == 0) {
2911 tl_assert(old_vts == NULL);
2912 continue;
2913 }
2914 tl_assert(old_vts != NULL);
2915 tl_assert(old_vts->id == i);
2916 tl_assert(old_vts->ts != NULL);
2917
2918 /* It is in use. Make a pruned version. */
2919 nBeforePruning++;
2920 nSTSsBefore += old_vts->usedTS;
2921 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
2922 old_vts, verydead_thread_table);
2923 tl_assert(new_vts->sizeTS == new_vts->usedTS);
2924 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
2925 == 0x0ddC0ffeeBadF00dULL);
2926
2927 /* Get rid of the old VTS and the tree entry. It's a bit more
2928 complex to incrementally delete the VTSs now than to nuke
2929 them all after we're done, but the upside is that we don't
2930 wind up temporarily storing potentially two complete copies
2931 of each VTS and hence spiking memory use. */
2932 UWord oldK = 0, oldV = 12345;
2933 Bool present = VG_(delFromFM)( vts_set,
2934 &oldK, &oldV, (UWord)old_vts );
2935 tl_assert(present); /* else it isn't in vts_set ?! */
2936 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2937 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
2938 /* now free the VTS itself */
2939 VTS__delete(old_vts);
2940 old_te->vts = NULL;
2941 old_vts = NULL;
2942
2943 /* NO MENTIONS of old_vts allowed beyond this point. */
2944
2945 /* Ok, we have the pruned copy in new_vts. See if a
2946 structurally identical version is already present in new_set.
2947 If so, delete the one we just made and move on; if not, add
2948 it. */
2949 VTS* identical_version = NULL;
2950 UWord valW = 12345;
2951 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
2952 (UWord)new_vts)) {
2953 // already have it
2954 tl_assert(valW == 0);
2955 tl_assert(identical_version != NULL);
2956 tl_assert(identical_version != new_vts);
2957 VTS__delete(new_vts);
2958 new_vts = identical_version;
2959 tl_assert(new_vts->id != VtsID_INVALID);
2960 } else {
2961 tl_assert(valW == 12345);
2962 tl_assert(identical_version == NULL);
2963 new_vts->id = new_VtsID_ctr++;
2964 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
2965 tl_assert(!b);
2966 VtsTE new_te;
2967 new_te.vts = new_vts;
2968 new_te.rc = 0;
2969 new_te.freelink = VtsID_INVALID;
2970 new_te.remap = VtsID_INVALID;
2971 Word j = VG_(addToXA)( new_tab, &new_te );
2972 tl_assert(j <= i);
2973 tl_assert(j == new_VtsID_ctr - 1);
2974 // stats
2975 nAfterPruning++;
2976 nSTSsAfter += new_vts->usedTS;
2977 }
2978 old_te->remap = new_vts->id;
2979
2980 } /* for (i = 0; i < nTab; i++) */
2981
2982 /* At this point, we have:
2983 * the old VTS table, with its .remap entries set,
2984 and with all .vts == NULL.
2985 * the old VTS tree should be empty, since it and the old VTSs
2986 it contained have been incrementally deleted was we worked
2987 through the old table.
2988 * the new VTS table, with all .rc == 0, all .freelink and .remap
2989 == VtsID_INVALID.
2990 * the new VTS tree.
2991 */
2992 tl_assert( VG_(sizeFM)(vts_set) == 0 );
2993
2994 /* Now actually apply the mapping. */
2995 /* Visit all the VtsIDs in the entire system. Where do we expect
2996 to find them?
2997 (a) in shadow memory -- the LineZs and LineFs
2998 (b) in our collection of struct _Thrs.
2999 (c) in our collection of struct _SOs.
3000 Nowhere else, AFAICS. Not in the zsm cache, because that just
3001 got invalidated.
3002
3003 Using the .remap fields in vts_tab, map each old VtsID to a new
3004 VtsID. For each old VtsID, dec its rc; and for each new one,
3005 inc it. This sets up the new refcounts, and it also gives a
3006 cheap sanity check of the old ones: all old refcounts should be
3007 zero after this operation.
3008 */
3009
3010 /* Do the mappings for (a) above: iterate over the Primary shadow
3011 mem map (WordFM Addr SecMap*). */
3012 UWord secmapW = 0;
3013 VG_(initIterFM)( map_shmem );
3014 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3015 UWord j;
3016 SecMap* sm = (SecMap*)secmapW;
3017 tl_assert(sm->magic == SecMap_MAGIC);
3018 /* Deal with the LineZs */
3019 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3020 LineZ* lineZ = &sm->linesZ[i];
3021 if (lineZ->dict[0] == SVal_INVALID)
3022 continue; /* not in use -- data is in F rep instead */
3023 for (j = 0; j < 4; j++)
3024 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3025 }
3026 /* Deal with the LineFs */
3027 for (i = 0; i < sm->linesF_size; i++) {
3028 LineF* lineF = &sm->linesF[i];
3029 if (!lineF->inUse)
3030 continue;
3031 for (j = 0; j < N_LINE_ARANGE; j++)
3032 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3033 }
3034 }
3035 VG_(doneIterFM)( map_shmem );
3036
3037 /* Do the mappings for (b) above: visit our collection of struct
3038 _Thrs. */
3039 Thread* hgthread = get_admin_threads();
3040 tl_assert(hgthread);
3041 while (hgthread) {
3042 Thr* hbthr = hgthread->hbthr;
3043 tl_assert(hbthr);
3044 /* Threads that are listed in the prunable set have their viR
3045 and viW set to VtsID_INVALID, so we can't mess with them. */
3046 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3047 tl_assert(hbthr->viR == VtsID_INVALID);
3048 tl_assert(hbthr->viW == VtsID_INVALID);
3049 hgthread = hgthread->admin;
3050 continue;
3051 }
3052 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3053 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3054 hgthread = hgthread->admin;
3055 }
3056
3057 /* Do the mappings for (c) above: visit the struct _SOs. */
3058 SO* so = admin_SO;
3059 while (so) {
3060 if (so->viR != VtsID_INVALID)
3061 remap_VtsID( vts_tab, new_tab, &so->viR );
3062 if (so->viW != VtsID_INVALID)
3063 remap_VtsID( vts_tab, new_tab, &so->viW );
3064 so = so->admin_next;
3065 }
3066
3067 /* So, we're nearly done (with this incredibly complex operation).
3068 Check the refcounts for the old VtsIDs all fell to zero, as
3069 expected. Any failure is serious. */
3070 for (i = 0; i < nTab; i++) {
3071 VtsTE* te = VG_(indexXA)( vts_tab, i );
3072 tl_assert(te->vts == NULL);
3073 /* This is the assert proper. Note we're also asserting
3074 zeroness for old entries which are unmapped (hence have
3075 .remap == VtsID_INVALID). That's OK. */
3076 tl_assert(te->rc == 0);
3077 }
3078
3079 /* Install the new table and set. */
3080 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3081 vts_set = new_set;
3082 VG_(deleteXA)( vts_tab );
3083 vts_tab = new_tab;
3084
3085 /* The freelist of vts_tab entries is empty now, because we've
3086 compacted all of the live entries at the low end of the
3087 table. */
3088 vts_tab_freelist = VtsID_INVALID;
3089
3090 /* Sanity check vts_set and vts_tab. */
3091
3092 /* Because all the live entries got slid down to the bottom of vts_tab: */
3093 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3094
3095 /* Assert that the vts_tab and vts_set entries point at each other
3096 in the required way */
3097 UWord wordK = 0, wordV = 0;
3098 VG_(initIterFM)( vts_set );
3099 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3100 tl_assert(wordK != 0);
3101 tl_assert(wordV == 0);
3102 VTS* vts = (VTS*)wordK;
3103 tl_assert(vts->id != VtsID_INVALID);
3104 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3105 tl_assert(te->vts == vts);
3106 }
3107 VG_(doneIterFM)( vts_set );
3108
3109 /* Also iterate over the table, and check each entry is
3110 plausible. */
3111 nTab = VG_(sizeXA)( vts_tab );
3112 for (i = 0; i < nTab; i++) {
3113 VtsTE* te = VG_(indexXA)( vts_tab, i );
3114 tl_assert(te->vts);
3115 tl_assert(te->vts->id == i);
3116 tl_assert(te->rc > 0); /* 'cos we just GC'd */
3117 tl_assert(te->freelink == VtsID_INVALID); /* in use */
3118 tl_assert(te->remap == VtsID_INVALID); /* not relevant */
3119 }
3120
3121 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
3122 if (VG_(clo_stats)) {
3123 static UInt ctr = 1;
3124 tl_assert(nTab > 0);
3125 VG_(message)(
3126 Vg_DebugMsg,
3127 "libhb: VTS PR: #%u before %lu (avg sz %lu) "
3128 "after %lu (avg sz %lu)\n",
3129 ctr++,
3130 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3131 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3132 );
3133 }
3134 if (0)
3135 VG_(printf)("VTQ: before pruning %lu (avg sz %lu), "
3136 "after pruning %lu (avg sz %lu)\n",
3137 nBeforePruning, nSTSsBefore / nBeforePruning,
3138 nAfterPruning, nSTSsAfter / nAfterPruning);
3139 /* ---------- END VTS PRUNING ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00003140}
3141
3142
3143/////////////////////////////////////////////////////////
3144// //
3145// Vts IDs //
3146// //
3147/////////////////////////////////////////////////////////
3148
3149//////////////////////////
sewardj7aa38a92011-02-27 23:04:12 +00003150/* A temporary, max-sized VTS which is used as a temporary (the first
3151 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3152static VTS* temp_max_sized_VTS = NULL;
3153
3154//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00003155static ULong stats__cmpLEQ_queries = 0;
3156static ULong stats__cmpLEQ_misses = 0;
3157static ULong stats__join2_queries = 0;
3158static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003159
3160static inline UInt ROL32 ( UInt w, Int n ) {
3161 w = (w << n) | (w >> (32-n));
3162 return w;
3163}
3164static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3165 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3166 return hash % nTab;
3167}
3168
sewardj23f12002009-07-24 08:45:08 +00003169#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00003170static
sewardj23f12002009-07-24 08:45:08 +00003171 struct { VtsID vi1; VtsID vi2; Bool leq; }
3172 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00003173
3174#define N_JOIN2_CACHE 1023
3175static
3176 struct { VtsID vi1; VtsID vi2; VtsID res; }
3177 join2_cache[N_JOIN2_CACHE];
3178
3179static void VtsID__invalidate_caches ( void ) {
3180 Int i;
sewardj23f12002009-07-24 08:45:08 +00003181 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3182 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3183 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3184 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00003185 }
3186 for (i = 0; i < N_JOIN2_CACHE; i++) {
3187 join2_cache[i].vi1 = VtsID_INVALID;
3188 join2_cache[i].vi2 = VtsID_INVALID;
3189 join2_cache[i].res = VtsID_INVALID;
3190 }
3191}
3192//////////////////////////
3193
sewardjd52392d2008-11-08 20:36:26 +00003194//static Bool VtsID__is_valid ( VtsID vi ) {
3195// VtsTE* ve;
3196// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3197// return False;
3198// ve = VG_(indexXA)( vts_tab, vi );
3199// if (!ve->vts)
3200// return False;
3201// tl_assert(ve->vts->id == vi);
3202// return True;
3203//}
sewardjf98e1c02008-10-25 16:22:41 +00003204
3205static VTS* VtsID__to_VTS ( VtsID vi ) {
3206 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3207 tl_assert(te->vts);
3208 return te->vts;
3209}
3210
3211static void VtsID__pp ( VtsID vi ) {
3212 HChar buf[100];
3213 VTS* vts = VtsID__to_VTS(vi);
3214 VTS__show( buf, sizeof(buf)-1, vts );
3215 buf[sizeof(buf)-1] = 0;
3216 VG_(printf)("%s", buf);
3217}
3218
3219/* compute partial ordering relation of vi1 and vi2. */
3220__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00003221static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00003222 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00003223 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00003224 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00003225 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00003226 tl_assert(vi1 != vi2);
3227 ////++
sewardj23f12002009-07-24 08:45:08 +00003228 stats__cmpLEQ_queries++;
3229 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3230 if (cmpLEQ_cache[hash].vi1 == vi1
3231 && cmpLEQ_cache[hash].vi2 == vi2)
3232 return cmpLEQ_cache[hash].leq;
3233 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00003234 ////--
3235 v1 = VtsID__to_VTS(vi1);
3236 v2 = VtsID__to_VTS(vi2);
sewardje4cce742011-02-24 15:25:24 +00003237 leq = VTS__cmpLEQ( v1, v2 ) == 0;
sewardjf98e1c02008-10-25 16:22:41 +00003238 ////++
sewardj23f12002009-07-24 08:45:08 +00003239 cmpLEQ_cache[hash].vi1 = vi1;
3240 cmpLEQ_cache[hash].vi2 = vi2;
3241 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00003242 ////--
sewardj23f12002009-07-24 08:45:08 +00003243 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00003244}
sewardj23f12002009-07-24 08:45:08 +00003245static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3246 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003247}
3248
3249/* compute binary join */
3250__attribute__((noinline))
3251static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3252 UInt hash;
3253 VtsID res;
sewardj7aa38a92011-02-27 23:04:12 +00003254 VTS *vts1, *vts2;
sewardjf98e1c02008-10-25 16:22:41 +00003255 //if (vi1 == vi2) return vi1;
3256 tl_assert(vi1 != vi2);
3257 ////++
3258 stats__join2_queries++;
3259 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3260 if (join2_cache[hash].vi1 == vi1
3261 && join2_cache[hash].vi2 == vi2)
3262 return join2_cache[hash].res;
3263 stats__join2_misses++;
3264 ////--
3265 vts1 = VtsID__to_VTS(vi1);
3266 vts2 = VtsID__to_VTS(vi2);
sewardj7aa38a92011-02-27 23:04:12 +00003267 temp_max_sized_VTS->usedTS = 0;
3268 VTS__join(temp_max_sized_VTS, vts1,vts2);
3269 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003270 ////++
3271 join2_cache[hash].vi1 = vi1;
3272 join2_cache[hash].vi2 = vi2;
3273 join2_cache[hash].res = res;
3274 ////--
3275 return res;
3276}
3277static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003278 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003279}
3280
3281/* create a singleton VTS, namely [thr:1] */
3282static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
sewardj7aa38a92011-02-27 23:04:12 +00003283 temp_max_sized_VTS->usedTS = 0;
3284 VTS__singleton(temp_max_sized_VTS, thr,tym);
3285 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003286}
3287
3288/* tick operation, creates value 1 if specified index is absent */
3289static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3290 VTS* vts = VtsID__to_VTS(vi);
sewardj7aa38a92011-02-27 23:04:12 +00003291 temp_max_sized_VTS->usedTS = 0;
3292 VTS__tick(temp_max_sized_VTS, idx,vts);
3293 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003294}
3295
3296/* index into a VTS (only for assertions) */
3297static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3298 VTS* vts = VtsID__to_VTS(vi);
3299 return VTS__indexAt_SLOW( vts, idx );
3300}
3301
sewardj23f12002009-07-24 08:45:08 +00003302/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3303 any, really) element in vi1 which is pointwise greater-than the
3304 corresponding element in vi2. If no such element exists, return
3305 NULL. This needs to be fairly quick since it is called every time
3306 a race is detected. */
3307static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3308{
3309 VTS *vts1, *vts2;
sewardje4cce742011-02-24 15:25:24 +00003310 Thr* diffthr;
3311 ThrID diffthrid;
sewardj23f12002009-07-24 08:45:08 +00003312 tl_assert(vi1 != vi2);
3313 vts1 = VtsID__to_VTS(vi1);
3314 vts2 = VtsID__to_VTS(vi2);
3315 tl_assert(vts1 != vts2);
sewardje4cce742011-02-24 15:25:24 +00003316 diffthrid = VTS__cmpLEQ(vts1, vts2);
3317 diffthr = Thr__from_ThrID(diffthrid);
sewardj23f12002009-07-24 08:45:08 +00003318 tl_assert(diffthr); /* else they are LEQ ! */
3319 return diffthr;
3320}
3321
3322
3323/////////////////////////////////////////////////////////
3324// //
3325// Filters //
3326// //
3327/////////////////////////////////////////////////////////
3328
sewardj23f12002009-07-24 08:45:08 +00003329/* Forget everything we know -- clear the filter and let everything
3330 through. This needs to be as fast as possible, since it is called
3331 every time the running thread changes, and every time a thread's
3332 vector clocks change, which can be quite frequent. The obvious
3333 fast way to do this is simply to stuff in tags which we know are
3334 not going to match anything, since they're not aligned to the start
3335 of a line. */
florian6bd9dc12012-11-23 16:17:43 +00003336static void Filter__clear ( Filter* fi, const HChar* who )
sewardj23f12002009-07-24 08:45:08 +00003337{
3338 UWord i;
3339 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3340 for (i = 0; i < FI_NUM_LINES; i += 8) {
3341 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3342 fi->tags[i+1] = 1;
3343 fi->tags[i+2] = 1;
3344 fi->tags[i+3] = 1;
3345 fi->tags[i+4] = 1;
3346 fi->tags[i+5] = 1;
3347 fi->tags[i+6] = 1;
3348 fi->tags[i+7] = 1;
3349 }
3350 tl_assert(i == FI_NUM_LINES);
3351}
3352
3353/* Clearing an arbitrary range in the filter. Unfortunately
3354 we have to do this due to core-supplied new/die-mem events. */
3355
3356static void Filter__clear_1byte ( Filter* fi, Addr a )
3357{
3358 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3359 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3360 FiLine* line = &fi->lines[lineno];
3361 UWord loff = (a - atag) / 8;
3362 UShort mask = 0x3 << (2 * (a & 7));
3363 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3364 if (LIKELY( fi->tags[lineno] == atag )) {
3365 /* hit. clear the bits. */
3366 UShort u16 = line->u16s[loff];
3367 line->u16s[loff] = u16 & ~mask; /* clear them */
3368 } else {
3369 /* miss. The filter doesn't hold this address, so ignore. */
3370 }
3371}
3372
3373static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3374{
3375 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3376 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3377 FiLine* line = &fi->lines[lineno];
3378 UWord loff = (a - atag) / 8;
3379 if (LIKELY( fi->tags[lineno] == atag )) {
3380 line->u16s[loff] = 0;
3381 } else {
3382 /* miss. The filter doesn't hold this address, so ignore. */
3383 }
3384}
3385
3386static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3387{
3388 //VG_(printf)("%lu ", len);
3389 /* slowly do part preceding 8-alignment */
3390 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3391 Filter__clear_1byte( fi, a );
3392 a++;
3393 len--;
3394 }
3395 /* vector loop */
3396 while (len >= 8) {
3397 Filter__clear_8bytes_aligned( fi, a );
3398 a += 8;
3399 len -= 8;
3400 }
3401 /* slowly do tail */
3402 while (UNLIKELY(len > 0)) {
3403 Filter__clear_1byte( fi, a );
3404 a++;
3405 len--;
3406 }
3407}
3408
3409
3410/* ------ Read handlers for the filter. ------ */
3411
3412static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3413{
3414 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3415 return False;
3416 {
3417 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3418 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3419 FiLine* line = &fi->lines[lineno];
3420 UWord loff = (a - atag) / 8;
3421 UShort mask = 0xAAAA;
3422 if (LIKELY( fi->tags[lineno] == atag )) {
3423 /* hit. check line and update. */
3424 UShort u16 = line->u16s[loff];
3425 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3426 line->u16s[loff] = u16 | mask; /* set them */
3427 return ok;
3428 } else {
3429 /* miss. nuke existing line and re-use it. */
3430 UWord i;
3431 fi->tags[lineno] = atag;
3432 for (i = 0; i < FI_LINE_SZB / 8; i++)
3433 line->u16s[i] = 0;
3434 line->u16s[loff] = mask;
3435 return False;
3436 }
3437 }
3438}
3439
3440static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3441{
3442 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3443 return False;
3444 {
3445 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3446 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3447 FiLine* line = &fi->lines[lineno];
3448 UWord loff = (a - atag) / 8;
3449 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3450 if (LIKELY( fi->tags[lineno] == atag )) {
3451 /* hit. check line and update. */
3452 UShort u16 = line->u16s[loff];
3453 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3454 line->u16s[loff] = u16 | mask; /* set them */
3455 return ok;
3456 } else {
3457 /* miss. nuke existing line and re-use it. */
3458 UWord i;
3459 fi->tags[lineno] = atag;
3460 for (i = 0; i < FI_LINE_SZB / 8; i++)
3461 line->u16s[i] = 0;
3462 line->u16s[loff] = mask;
3463 return False;
3464 }
3465 }
3466}
3467
3468static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3469{
3470 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3471 return False;
3472 {
3473 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3474 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3475 FiLine* line = &fi->lines[lineno];
3476 UWord loff = (a - atag) / 8;
3477 UShort mask = 0xA << (2 * (a & 6));
3478 /* mask is A000, 0A00, 00A0 or 000A */
3479 if (LIKELY( fi->tags[lineno] == atag )) {
3480 /* hit. check line and update. */
3481 UShort u16 = line->u16s[loff];
3482 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3483 line->u16s[loff] = u16 | mask; /* set them */
3484 return ok;
3485 } else {
3486 /* miss. nuke existing line and re-use it. */
3487 UWord i;
3488 fi->tags[lineno] = atag;
3489 for (i = 0; i < FI_LINE_SZB / 8; i++)
3490 line->u16s[i] = 0;
3491 line->u16s[loff] = mask;
3492 return False;
3493 }
3494 }
3495}
3496
3497static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3498{
3499 {
3500 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3501 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3502 FiLine* line = &fi->lines[lineno];
3503 UWord loff = (a - atag) / 8;
3504 UShort mask = 0x2 << (2 * (a & 7));
3505 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3506 if (LIKELY( fi->tags[lineno] == atag )) {
3507 /* hit. check line and update. */
3508 UShort u16 = line->u16s[loff];
3509 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3510 line->u16s[loff] = u16 | mask; /* set them */
3511 return ok;
3512 } else {
3513 /* miss. nuke existing line and re-use it. */
3514 UWord i;
3515 fi->tags[lineno] = atag;
3516 for (i = 0; i < FI_LINE_SZB / 8; i++)
3517 line->u16s[i] = 0;
3518 line->u16s[loff] = mask;
3519 return False;
3520 }
3521 }
3522}
3523
3524
3525/* ------ Write handlers for the filter. ------ */
3526
3527static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3528{
3529 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3530 return False;
3531 {
3532 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3533 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3534 FiLine* line = &fi->lines[lineno];
3535 UWord loff = (a - atag) / 8;
3536 UShort mask = 0xFFFF;
3537 if (LIKELY( fi->tags[lineno] == atag )) {
3538 /* hit. check line and update. */
3539 UShort u16 = line->u16s[loff];
3540 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3541 line->u16s[loff] = u16 | mask; /* set them */
3542 return ok;
3543 } else {
3544 /* miss. nuke existing line and re-use it. */
3545 UWord i;
3546 fi->tags[lineno] = atag;
3547 for (i = 0; i < FI_LINE_SZB / 8; i++)
3548 line->u16s[i] = 0;
3549 line->u16s[loff] = mask;
3550 return False;
3551 }
3552 }
3553}
3554
3555static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3556{
3557 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3558 return False;
3559 {
3560 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3561 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3562 FiLine* line = &fi->lines[lineno];
3563 UWord loff = (a - atag) / 8;
3564 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3565 if (LIKELY( fi->tags[lineno] == atag )) {
3566 /* hit. check line and update. */
3567 UShort u16 = line->u16s[loff];
3568 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3569 line->u16s[loff] = u16 | mask; /* set them */
3570 return ok;
3571 } else {
3572 /* miss. nuke existing line and re-use it. */
3573 UWord i;
3574 fi->tags[lineno] = atag;
3575 for (i = 0; i < FI_LINE_SZB / 8; i++)
3576 line->u16s[i] = 0;
3577 line->u16s[loff] = mask;
3578 return False;
3579 }
3580 }
3581}
3582
3583static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3584{
3585 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3586 return False;
3587 {
3588 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3589 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3590 FiLine* line = &fi->lines[lineno];
3591 UWord loff = (a - atag) / 8;
3592 UShort mask = 0xF << (2 * (a & 6));
3593 /* mask is F000, 0F00, 00F0 or 000F */
3594 if (LIKELY( fi->tags[lineno] == atag )) {
3595 /* hit. check line and update. */
3596 UShort u16 = line->u16s[loff];
3597 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3598 line->u16s[loff] = u16 | mask; /* set them */
3599 return ok;
3600 } else {
3601 /* miss. nuke existing line and re-use it. */
3602 UWord i;
3603 fi->tags[lineno] = atag;
3604 for (i = 0; i < FI_LINE_SZB / 8; i++)
3605 line->u16s[i] = 0;
3606 line->u16s[loff] = mask;
3607 return False;
3608 }
3609 }
3610}
3611
3612static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3613{
3614 {
3615 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3616 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3617 FiLine* line = &fi->lines[lineno];
3618 UWord loff = (a - atag) / 8;
3619 UShort mask = 0x3 << (2 * (a & 7));
3620 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3621 if (LIKELY( fi->tags[lineno] == atag )) {
3622 /* hit. check line and update. */
3623 UShort u16 = line->u16s[loff];
3624 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3625 line->u16s[loff] = u16 | mask; /* set them */
3626 return ok;
3627 } else {
3628 /* miss. nuke existing line and re-use it. */
3629 UWord i;
3630 fi->tags[lineno] = atag;
3631 for (i = 0; i < FI_LINE_SZB / 8; i++)
3632 line->u16s[i] = 0;
3633 line->u16s[loff] = mask;
3634 return False;
3635 }
3636 }
3637}
3638
sewardjf98e1c02008-10-25 16:22:41 +00003639
3640/////////////////////////////////////////////////////////
3641// //
3642// Threads //
3643// //
3644/////////////////////////////////////////////////////////
3645
sewardje4cce742011-02-24 15:25:24 +00003646/* Maps ThrID values to their Thr*s (which contain ThrID values that
3647 should point back to the relevant slot in the array. Lowest
3648 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
3649static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
3650
3651/* And a counter to dole out ThrID values. For rationale/background,
3652 see comments on definition of ScalarTS (far) above. */
sewardj7aa38a92011-02-27 23:04:12 +00003653static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
sewardje4cce742011-02-24 15:25:24 +00003654
3655static ThrID Thr__to_ThrID ( Thr* thr ) {
3656 return thr->thrid;
3657}
3658static Thr* Thr__from_ThrID ( UInt thrid ) {
3659 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
3660 tl_assert(thr->thrid == thrid);
3661 return thr;
3662}
3663
3664static Thr* Thr__new ( void )
3665{
sewardjf98e1c02008-10-25 16:22:41 +00003666 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
3667 thr->viR = VtsID_INVALID;
3668 thr->viW = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003669 thr->llexit_done = False;
3670 thr->joinedwith_done = False;
sewardj23f12002009-07-24 08:45:08 +00003671 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
sewardj2d2ea2f2009-08-02 10:15:07 +00003672 /* We only really need this at history level 1, but unfortunately
3673 this routine is called before the command line processing is
3674 done (sigh), so we can't rely on HG_(clo_history_level) at this
3675 point. Hence always allocate it. Bah. */
sewardj8ab2c132009-08-02 09:34:35 +00003676 thr->local_Kws_n_stacks
sewardj2d2ea2f2009-08-02 10:15:07 +00003677 = VG_(newXA)( HG_(zalloc),
3678 "libhb.Thr__new.3 (local_Kws_and_stacks)",
sewardj23f12002009-07-24 08:45:08 +00003679 HG_(free), sizeof(ULong_n_EC) );
sewardje4cce742011-02-24 15:25:24 +00003680
3681 /* Add this Thr* <-> ThrID binding to the mapping, and
3682 cross-check */
3683 if (!thrid_to_thr_map) {
3684 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
3685 HG_(free), sizeof(Thr*) );
3686 tl_assert(thrid_to_thr_map);
3687 }
3688
sewardj7aa38a92011-02-27 23:04:12 +00003689 if (thrid_counter >= ThrID_MAX_VALID) {
sewardje4cce742011-02-24 15:25:24 +00003690 /* We're hosed. We have to stop. */
3691 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
3692 }
3693
3694 thr->thrid = thrid_counter++;
3695 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
3696 tl_assert(ix + 1024 == thr->thrid);
3697
sewardjf98e1c02008-10-25 16:22:41 +00003698 return thr;
3699}
3700
sewardj8ab2c132009-08-02 09:34:35 +00003701static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00003702{
3703 Word nPresent;
3704 ULong_n_EC pair;
3705 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00003706
3707 // We only collect this info at history level 1 (approx)
3708 if (HG_(clo_history_level) != 1)
3709 return;
3710
sewardj8ab2c132009-08-02 09:34:35 +00003711 /* This is the scalar Kw for thr. */
3712 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00003713 pair.ec = main_get_EC( thr );
3714 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00003715 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00003716
3717 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00003718 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00003719
3720 /* Throw away old stacks, if necessary. We can't accumulate stuff
3721 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00003722 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
3723 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
3724 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
3725 if (0)
3726 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00003727 thr, pair.ull, pair.ec );
3728 }
3729
3730 if (nPresent > 0) {
3731 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00003732 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
3733 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00003734 }
3735
3736 if (nPresent == 0)
3737 pair.ec = NULL;
3738
sewardj8ab2c132009-08-02 09:34:35 +00003739 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00003740
3741 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00003742 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00003743 thr, pair.ull, pair.ec );
3744 if (0)
3745 VG_(pp_ExeContext)(pair.ec);
3746}
3747
florian6bd9dc12012-11-23 16:17:43 +00003748static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
3749 const ULong_n_EC* pair2 )
sewardj23f12002009-07-24 08:45:08 +00003750{
3751 if (pair1->ull < pair2->ull) return -1;
3752 if (pair1->ull > pair2->ull) return 1;
3753 return 0;
3754}
3755
sewardjf98e1c02008-10-25 16:22:41 +00003756
3757/////////////////////////////////////////////////////////
3758// //
3759// Shadow Values //
3760// //
3761/////////////////////////////////////////////////////////
3762
3763// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
3764// hb_zsm.h. We have to do everything else here.
3765
3766/* SVal is 64 bit unsigned int.
3767
3768 <---------30---------> <---------30--------->
3769 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00003770 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00003771 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
3772
sewardjf98e1c02008-10-25 16:22:41 +00003773*/
3774#define SVAL_TAGMASK (3ULL << 62)
3775
3776static inline Bool SVal__isC ( SVal s ) {
3777 return (0ULL << 62) == (s & SVAL_TAGMASK);
3778}
3779static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
3780 //tl_assert(VtsID__is_valid(rmini));
3781 //tl_assert(VtsID__is_valid(wmini));
3782 return (((ULong)rmini) << 32) | ((ULong)wmini);
3783}
3784static inline VtsID SVal__unC_Rmin ( SVal s ) {
3785 tl_assert(SVal__isC(s));
3786 return (VtsID)(s >> 32);
3787}
3788static inline VtsID SVal__unC_Wmin ( SVal s ) {
3789 tl_assert(SVal__isC(s));
3790 return (VtsID)(s & 0xFFFFFFFFULL);
3791}
3792
sewardj23f12002009-07-24 08:45:08 +00003793static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00003794 return (2ULL << 62) == (s & SVAL_TAGMASK);
3795}
sewardj23f12002009-07-24 08:45:08 +00003796static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00003797 return 2ULL << 62;
3798}
3799
3800/* Direct callback from lib_zsm. */
3801static void SVal__rcinc ( SVal s ) {
3802 if (SVal__isC(s)) {
3803 VtsID__rcinc( SVal__unC_Rmin(s) );
3804 VtsID__rcinc( SVal__unC_Wmin(s) );
3805 }
3806}
3807
3808/* Direct callback from lib_zsm. */
3809static void SVal__rcdec ( SVal s ) {
3810 if (SVal__isC(s)) {
3811 VtsID__rcdec( SVal__unC_Rmin(s) );
3812 VtsID__rcdec( SVal__unC_Wmin(s) );
3813 }
3814}
3815
3816
3817/////////////////////////////////////////////////////////
3818// //
3819// Change-event map2 //
3820// //
3821/////////////////////////////////////////////////////////
3822
sewardjf98e1c02008-10-25 16:22:41 +00003823#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
3824
3825/* This is in two parts:
3826
sewardj23f12002009-07-24 08:45:08 +00003827 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00003828 traces. When the reference count of a stack trace becomes zero,
3829 it is removed from the set and freed up. The intent is to have
3830 a set of stack traces which can be referred to from (2), but to
3831 only represent each one once. The set is indexed/searched by
3832 ordering on the stack trace vectors.
3833
sewardj849b0ed2008-12-21 10:43:10 +00003834 2. A SparseWA of OldRefs. These store information about each old
3835 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00003836 location for which the information is recorded. For LRU
3837 purposes, each OldRef also contains a generation number,
3838 indicating when it was most recently accessed.
3839
3840 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00003841 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3842 size) triples to RCECs. This allows us to collect the last
3843 access-traceback by up to N_OLDREF_ACCS different triples for
3844 this location. The accs[] array is a MTF-array. If a binding
3845 falls off the end, that's too bad -- we will lose info about
3846 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00003847
sewardj849b0ed2008-12-21 10:43:10 +00003848 When the SparseWA becomes too big, we can throw away the OldRefs
sewardjf98e1c02008-10-25 16:22:41 +00003849 whose generation numbers are below some threshold; hence doing
3850 approximate LRU discarding. For each discarded OldRef we must
3851 of course decrement the reference count on the all RCECs it
3852 refers to, in order that entries from (1) eventually get
3853 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00003854
3855 A major improvement in reliability of this mechanism would be to
3856 have a dynamically sized OldRef.accs[] array, so no entries ever
3857 fall off the end. In investigations (Dec 08) it appears that a
3858 major cause for the non-availability of conflicting-access traces
3859 in race reports is caused by the fixed size of this array. I
3860 suspect for most OldRefs, only a few entries are used, but for a
3861 minority of cases there is an overflow, leading to info lossage.
3862 Investigations also suggest this is very workload and scheduling
3863 sensitive. Therefore a dynamic sizing would be better.
3864
philippe6643e962012-01-17 21:16:30 +00003865 However, dynamic sizing would defeat the use of a PoolAllocator
sewardj849b0ed2008-12-21 10:43:10 +00003866 for OldRef structures. And that's important for performance. So
3867 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00003868*/
3869
3870
3871static UWord stats__ctxt_rcdec1 = 0;
3872static UWord stats__ctxt_rcdec2 = 0;
3873static UWord stats__ctxt_rcdec3 = 0;
3874static UWord stats__ctxt_rcdec_calls = 0;
3875static UWord stats__ctxt_rcdec_discards = 0;
3876static UWord stats__ctxt_rcdec1_eq = 0;
3877
3878static UWord stats__ctxt_tab_curr = 0;
3879static UWord stats__ctxt_tab_max = 0;
3880
3881static UWord stats__ctxt_tab_qs = 0;
3882static UWord stats__ctxt_tab_cmps = 0;
3883
3884
3885///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00003886//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00003887///
3888
3889#define N_FRAMES 8
3890
3891// (UInt) `echo "Reference Counted Execution Context" | md5sum`
3892#define RCEC_MAGIC 0xab88abb2UL
3893
3894//#define N_RCEC_TAB 98317 /* prime */
3895#define N_RCEC_TAB 196613 /* prime */
3896
3897typedef
3898 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00003899 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003900 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00003901 UWord rc;
3902 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00003903 UWord frames_hash; /* hash of all the frames */
3904 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00003905 }
3906 RCEC;
3907
3908static RCEC** contextTab = NULL; /* hash table of RCEC*s */
3909
3910
3911/* Gives an arbitrary total order on RCEC .frames fields */
3912static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
3913 Word i;
3914 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
3915 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00003916 if (ec1->frames_hash < ec2->frames_hash) return -1;
3917 if (ec1->frames_hash > ec2->frames_hash) return 1;
3918 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003919 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00003920 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00003921 }
3922 return 0;
3923}
3924
3925
3926/* Dec the ref of this RCEC. */
3927static void ctxt__rcdec ( RCEC* ec )
3928{
3929 stats__ctxt_rcdec_calls++;
3930 tl_assert(ec && ec->magic == RCEC_MAGIC);
3931 tl_assert(ec->rc > 0);
3932 ec->rc--;
3933}
3934
3935static void ctxt__rcinc ( RCEC* ec )
3936{
3937 tl_assert(ec && ec->magic == RCEC_MAGIC);
3938 ec->rc++;
3939}
3940
3941
philippe6643e962012-01-17 21:16:30 +00003942//////////// BEGIN RCEC pool allocator
3943static PoolAlloc* rcec_pool_allocator;
sewardjd86e3a22008-12-03 11:39:37 +00003944
3945static RCEC* alloc_RCEC ( void ) {
philippe6643e962012-01-17 21:16:30 +00003946 return VG_(allocEltPA) ( rcec_pool_allocator );
sewardjd86e3a22008-12-03 11:39:37 +00003947}
3948
3949static void free_RCEC ( RCEC* rcec ) {
3950 tl_assert(rcec->magic == RCEC_MAGIC);
philippe6643e962012-01-17 21:16:30 +00003951 VG_(freeEltPA)( rcec_pool_allocator, rcec );
sewardjd86e3a22008-12-03 11:39:37 +00003952}
philippe6643e962012-01-17 21:16:30 +00003953//////////// END RCEC pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00003954
3955
sewardjf98e1c02008-10-25 16:22:41 +00003956/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
3957 move it one step closer the the front of the list, so as to make
3958 subsequent searches for it cheaper. */
3959static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
3960{
3961 RCEC *ec0, *ec1, *ec2;
3962 if (ec == *headp)
3963 tl_assert(0); /* already at head of list */
3964 tl_assert(ec != NULL);
3965 ec0 = *headp;
3966 ec1 = NULL;
3967 ec2 = NULL;
3968 while (True) {
3969 if (ec0 == NULL || ec0 == ec) break;
3970 ec2 = ec1;
3971 ec1 = ec0;
3972 ec0 = ec0->next;
3973 }
3974 tl_assert(ec0 == ec);
3975 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
3976 RCEC* tmp;
3977 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
3978 predecessor. Swap ec0 and ec1, that is, move ec0 one step
3979 closer to the start of the list. */
3980 tl_assert(ec2->next == ec1);
3981 tl_assert(ec1->next == ec0);
3982 tmp = ec0->next;
3983 ec2->next = ec0;
3984 ec0->next = ec1;
3985 ec1->next = tmp;
3986 }
3987 else
3988 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
3989 /* it's second in the list. */
3990 tl_assert(*headp == ec1);
3991 tl_assert(ec1->next == ec0);
3992 ec1->next = ec0->next;
3993 ec0->next = ec1;
3994 *headp = ec0;
3995 }
3996}
3997
3998
3999/* Find the given RCEC in the tree, and return a pointer to it. Or,
4000 if not present, add the given one to the tree (by making a copy of
4001 it, so the caller can immediately deallocate the original) and
4002 return a pointer to the copy. The caller can safely have 'example'
4003 on its stack, since we will always return a pointer to a copy of
4004 it, not to the original. Note that the inserted node will have .rc
4005 of zero and so the caller must immediatly increment it. */
4006__attribute__((noinline))
4007static RCEC* ctxt__find_or_add ( RCEC* example )
4008{
4009 UWord hent;
4010 RCEC* copy;
4011 tl_assert(example && example->magic == RCEC_MAGIC);
4012 tl_assert(example->rc == 0);
4013
4014 /* Search the hash table to see if we already have it. */
4015 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00004016 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00004017 copy = contextTab[hent];
4018 while (1) {
4019 if (!copy) break;
4020 tl_assert(copy->magic == RCEC_MAGIC);
4021 stats__ctxt_tab_cmps++;
4022 if (0 == RCEC__cmp_by_frames(copy, example)) break;
4023 copy = copy->next;
4024 }
4025
4026 if (copy) {
4027 tl_assert(copy != example);
4028 /* optimisation: if it's not at the head of its list, move 1
4029 step fwds, to make future searches cheaper */
4030 if (copy != contextTab[hent]) {
4031 move_RCEC_one_step_forward( &contextTab[hent], copy );
4032 }
4033 } else {
sewardjd86e3a22008-12-03 11:39:37 +00004034 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00004035 tl_assert(copy != example);
4036 *copy = *example;
4037 copy->next = contextTab[hent];
4038 contextTab[hent] = copy;
4039 stats__ctxt_tab_curr++;
4040 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4041 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4042 }
4043 return copy;
4044}
4045
4046static inline UWord ROLW ( UWord w, Int n )
4047{
4048 Int bpw = 8 * sizeof(UWord);
4049 w = (w << n) | (w >> (bpw-n));
4050 return w;
4051}
4052
4053__attribute__((noinline))
4054static RCEC* get_RCEC ( Thr* thr )
4055{
4056 UWord hash, i;
4057 RCEC example;
4058 example.magic = RCEC_MAGIC;
4059 example.rc = 0;
4060 example.rcX = 0;
njn6c83d5e2009-05-05 23:46:24 +00004061 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00004062 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00004063 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004064 hash ^= example.frames[i];
4065 hash = ROLW(hash, 19);
4066 }
njn6c83d5e2009-05-05 23:46:24 +00004067 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00004068 return ctxt__find_or_add( &example );
4069}
4070
4071///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00004072//// Part (2):
4073/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00004074///
4075
4076// (UInt) `echo "Old Reference Information" | md5sum`
4077#define OldRef_MAGIC 0x30b1f075UL
4078
sewardjffce8152011-06-24 10:09:41 +00004079/* Records an access: a thread, a context (size & writeness) and the
4080 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
4081 2, 10 = 4, 11 = 8.
sewardjc5ea9962008-12-07 01:41:46 +00004082*/
sewardjffce8152011-06-24 10:09:41 +00004083typedef
4084 struct {
4085 RCEC* rcec;
4086 WordSetID locksHeldW;
4087 UInt thrid : SCALARTS_N_THRBITS;
4088 UInt szLg2B : 2;
4089 UInt isW : 1;
4090 }
4091 Thr_n_RCEC;
sewardjf98e1c02008-10-25 16:22:41 +00004092
sewardj849b0ed2008-12-21 10:43:10 +00004093#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00004094
4095typedef
4096 struct {
sewardjd86e3a22008-12-03 11:39:37 +00004097 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00004098 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00004099 /* or free list when not in use */
sewardjffce8152011-06-24 10:09:41 +00004100 /* unused slots in this array have .thrid == 0, which is invalid */
sewardjf98e1c02008-10-25 16:22:41 +00004101 Thr_n_RCEC accs[N_OLDREF_ACCS];
4102 }
4103 OldRef;
4104
sewardjd86e3a22008-12-03 11:39:37 +00004105
philippe6643e962012-01-17 21:16:30 +00004106//////////// BEGIN OldRef pool allocator
4107static PoolAlloc* oldref_pool_allocator;
sewardjd86e3a22008-12-03 11:39:37 +00004108
4109static OldRef* alloc_OldRef ( void ) {
philippe6643e962012-01-17 21:16:30 +00004110 return VG_(allocEltPA) ( oldref_pool_allocator );
sewardjd86e3a22008-12-03 11:39:37 +00004111}
4112
4113static void free_OldRef ( OldRef* r ) {
4114 tl_assert(r->magic == OldRef_MAGIC);
philippe6643e962012-01-17 21:16:30 +00004115 VG_(freeEltPA)( oldref_pool_allocator, r );
sewardjd86e3a22008-12-03 11:39:37 +00004116}
philippe6643e962012-01-17 21:16:30 +00004117//////////// END OldRef pool allocator
sewardjd86e3a22008-12-03 11:39:37 +00004118
sewardjd86e3a22008-12-03 11:39:37 +00004119
sewardjbc307e52008-12-06 22:10:54 +00004120static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
4121static UWord oldrefGen = 0; /* current LRU generation # */
4122static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
4123static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00004124
sewardj1669cc72008-12-13 01:20:21 +00004125inline static UInt min_UInt ( UInt a, UInt b ) {
4126 return a < b ? a : b;
4127}
4128
sewardja781be62008-12-08 00:12:28 +00004129/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4130 first interval is lower, 1 if the first interval is higher, and 0
4131 if there is any overlap. Redundant paranoia with casting is there
4132 following what looked distinctly like a bug in gcc-4.1.2, in which
4133 some of the comparisons were done signedly instead of
4134 unsignedly. */
4135/* Copied from exp-ptrcheck/sg_main.c */
4136static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4137 Addr a2, SizeT n2 ) {
4138 UWord a1w = (UWord)a1;
4139 UWord n1w = (UWord)n1;
4140 UWord a2w = (UWord)a2;
4141 UWord n2w = (UWord)n2;
4142 tl_assert(n1w > 0 && n2w > 0);
4143 if (a1w + n1w <= a2w) return -1L;
4144 if (a2w + n2w <= a1w) return 1L;
4145 return 0;
4146}
4147
sewardjc5ea9962008-12-07 01:41:46 +00004148static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00004149{
sewardjd86e3a22008-12-03 11:39:37 +00004150 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00004151 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00004152 Word i, j;
4153 UWord keyW, valW;
4154 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004155
sewardjffce8152011-06-24 10:09:41 +00004156 tl_assert(thr);
4157 ThrID thrid = thr->thrid;
4158 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4159
4160 WordSetID locksHeldW = thr->hgthread->locksetW;
4161
sewardjc5ea9962008-12-07 01:41:46 +00004162 rcec = get_RCEC( thr );
4163 ctxt__rcinc(rcec);
4164
sewardjffce8152011-06-24 10:09:41 +00004165 UInt szLg2B = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004166 switch (szB) {
4167 /* This doesn't look particularly branch-predictor friendly. */
sewardjffce8152011-06-24 10:09:41 +00004168 case 1: szLg2B = 0; break;
4169 case 2: szLg2B = 1; break;
4170 case 4: szLg2B = 2; break;
4171 case 8: szLg2B = 3; break;
sewardjc5ea9962008-12-07 01:41:46 +00004172 default: tl_assert(0);
4173 }
4174
sewardjffce8152011-06-24 10:09:41 +00004175 /* Look in the map to see if we already have a record for this
4176 address. */
sewardjbc307e52008-12-06 22:10:54 +00004177 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00004178
sewardjd86e3a22008-12-03 11:39:37 +00004179 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00004180
4181 /* We already have a record for this address. We now need to
sewardjffce8152011-06-24 10:09:41 +00004182 see if we have a stack trace pertaining to this (thrid, R/W,
sewardj849b0ed2008-12-21 10:43:10 +00004183 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00004184 tl_assert(keyW == a);
4185 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004186 tl_assert(ref->magic == OldRef_MAGIC);
4187
sewardjf98e1c02008-10-25 16:22:41 +00004188 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004189 if (ref->accs[i].thrid != thrid)
sewardj849b0ed2008-12-21 10:43:10 +00004190 continue;
sewardjffce8152011-06-24 10:09:41 +00004191 if (ref->accs[i].szLg2B != szLg2B)
4192 continue;
4193 if (ref->accs[i].isW != (UInt)(isW & 1))
sewardj849b0ed2008-12-21 10:43:10 +00004194 continue;
4195 /* else we have a match, so stop looking. */
4196 break;
sewardjf98e1c02008-10-25 16:22:41 +00004197 }
4198
4199 if (i < N_OLDREF_ACCS) {
sewardjffce8152011-06-24 10:09:41 +00004200 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */
sewardjf98e1c02008-10-25 16:22:41 +00004201 if (i > 0) {
4202 Thr_n_RCEC tmp = ref->accs[i-1];
4203 ref->accs[i-1] = ref->accs[i];
4204 ref->accs[i] = tmp;
4205 i--;
4206 }
sewardjc5ea9962008-12-07 01:41:46 +00004207 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00004208 stats__ctxt_rcdec1++;
sewardjffce8152011-06-24 10:09:41 +00004209 ctxt__rcdec( ref->accs[i].rcec );
4210 tl_assert(ref->accs[i].thrid == thrid);
4211 /* Update the RCEC and the W-held lockset. */
4212 ref->accs[i].rcec = rcec;
4213 ref->accs[i].locksHeldW = locksHeldW;
sewardjf98e1c02008-10-25 16:22:41 +00004214 } else {
sewardjffce8152011-06-24 10:09:41 +00004215 /* No entry for this (thread, R/W, size, nWHeld) quad.
4216 Shuffle all of them down one slot, and put the new entry
4217 at the start of the array. */
4218 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
sewardjf98e1c02008-10-25 16:22:41 +00004219 /* the last slot is in use. We must dec the rc on the
4220 associated rcec. */
4221 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
4222 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00004223 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
4224 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjffce8152011-06-24 10:09:41 +00004225 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
sewardjf98e1c02008-10-25 16:22:41 +00004226 } else {
4227 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
4228 }
4229 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
4230 ref->accs[j] = ref->accs[j-1];
sewardjffce8152011-06-24 10:09:41 +00004231 ref->accs[0].thrid = thrid;
4232 ref->accs[0].szLg2B = szLg2B;
4233 ref->accs[0].isW = (UInt)(isW & 1);
4234 ref->accs[0].locksHeldW = locksHeldW;
4235 ref->accs[0].rcec = rcec;
4236 /* thrid==0 is used to signify an empty slot, so we can't
4237 add zero thrid (such a ThrID is invalid anyway). */
4238 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
sewardjf98e1c02008-10-25 16:22:41 +00004239 }
4240
4241 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00004242
4243 } else {
4244
4245 /* We don't have a record for this address. Create a new one. */
4246 if (oldrefTreeN >= oldrefGenIncAt) {
4247 oldrefGen++;
4248 oldrefGenIncAt = oldrefTreeN + 50000;
4249 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
4250 oldrefGen, oldrefTreeN );
4251 }
sewardjd86e3a22008-12-03 11:39:37 +00004252
4253 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00004254 ref->magic = OldRef_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00004255 ref->gen = oldrefGen;
4256 ref->accs[0].thrid = thrid;
4257 ref->accs[0].szLg2B = szLg2B;
4258 ref->accs[0].isW = (UInt)(isW & 1);
4259 ref->accs[0].locksHeldW = locksHeldW;
4260 ref->accs[0].rcec = rcec;
4261
4262 /* thrid==0 is used to signify an empty slot, so we can't
4263 add zero thrid (such a ThrID is invalid anyway). */
4264 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4265
4266 /* Clear out the rest of the entries */
sewardjf98e1c02008-10-25 16:22:41 +00004267 for (j = 1; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004268 ref->accs[j].rcec = NULL;
4269 ref->accs[j].thrid = 0;
4270 ref->accs[j].szLg2B = 0;
4271 ref->accs[j].isW = 0;
4272 ref->accs[j].locksHeldW = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004273 }
sewardjbc307e52008-12-06 22:10:54 +00004274 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00004275 oldrefTreeN++;
4276
4277 }
4278}
4279
4280
sewardjffce8152011-06-24 10:09:41 +00004281/* Extract info from the conflicting-access machinery. */
sewardjc5ea9962008-12-07 01:41:46 +00004282Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjffce8152011-06-24 10:09:41 +00004283 /*OUT*/Thr** resThr,
4284 /*OUT*/SizeT* resSzB,
4285 /*OUT*/Bool* resIsW,
4286 /*OUT*/WordSetID* locksHeldW,
sewardjc5ea9962008-12-07 01:41:46 +00004287 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00004288{
sewardja781be62008-12-08 00:12:28 +00004289 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00004290 OldRef* ref;
4291 UWord keyW, valW;
4292 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004293
sewardjffce8152011-06-24 10:09:41 +00004294 ThrID cand_thrid;
4295 RCEC* cand_rcec;
4296 Bool cand_isW;
4297 SizeT cand_szB;
4298 WordSetID cand_locksHeldW;
4299 Addr cand_a;
sewardja781be62008-12-08 00:12:28 +00004300
4301 Addr toCheck[15];
4302 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004303
4304 tl_assert(thr);
4305 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00004306
sewardjffce8152011-06-24 10:09:41 +00004307 ThrID thrid = thr->thrid;
4308
sewardja781be62008-12-08 00:12:28 +00004309 toCheck[nToCheck++] = a;
4310 for (i = -7; i < (Word)szB; i++) {
4311 if (i != 0)
4312 toCheck[nToCheck++] = a + i;
4313 }
4314 tl_assert(nToCheck <= 15);
4315
4316 /* Now see if we can find a suitable matching event for
4317 any of the addresses in toCheck[0 .. nToCheck-1]. */
4318 for (j = 0; j < nToCheck; j++) {
4319
4320 cand_a = toCheck[j];
4321 // VG_(printf)("test %ld %p\n", j, cand_a);
4322
4323 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
4324 if (!b)
4325 continue;
4326
sewardjd86e3a22008-12-03 11:39:37 +00004327 ref = (OldRef*)valW;
sewardja781be62008-12-08 00:12:28 +00004328 tl_assert(keyW == cand_a);
sewardjf98e1c02008-10-25 16:22:41 +00004329 tl_assert(ref->magic == OldRef_MAGIC);
sewardjffce8152011-06-24 10:09:41 +00004330 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
sewardjf98e1c02008-10-25 16:22:41 +00004331
sewardjffce8152011-06-24 10:09:41 +00004332 cand_thrid = 0; /* invalid; see comments in event_map_bind */
4333 cand_rcec = NULL;
4334 cand_isW = False;
4335 cand_szB = 0;
4336 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
sewardjf98e1c02008-10-25 16:22:41 +00004337
sewardjc5ea9962008-12-07 01:41:46 +00004338 for (i = 0; i < N_OLDREF_ACCS; i++) {
4339 Thr_n_RCEC* cand = &ref->accs[i];
sewardjffce8152011-06-24 10:09:41 +00004340 cand_rcec = cand->rcec;
4341 cand_thrid = cand->thrid;
4342 cand_isW = (Bool)cand->isW;
4343 cand_szB = 1 << cand->szLg2B;
4344 cand_locksHeldW = cand->locksHeldW;
sewardjc5ea9962008-12-07 01:41:46 +00004345
sewardjffce8152011-06-24 10:09:41 +00004346 if (cand_thrid == 0)
sewardjc5ea9962008-12-07 01:41:46 +00004347 /* This slot isn't in use. Ignore it. */
4348 continue;
4349
sewardjffce8152011-06-24 10:09:41 +00004350 if (cand_thrid == thrid)
sewardjc5ea9962008-12-07 01:41:46 +00004351 /* This is an access by the same thread, but we're only
4352 interested in accesses from other threads. Ignore. */
4353 continue;
4354
4355 if ((!cand_isW) && (!isW))
4356 /* We don't want to report a read racing against another
4357 read; that's stupid. So in this case move on. */
4358 continue;
4359
sewardja781be62008-12-08 00:12:28 +00004360 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
4361 /* No overlap with the access we're asking about. Ignore. */
4362 continue;
4363
sewardjc5ea9962008-12-07 01:41:46 +00004364 /* We have a match. Stop searching. */
4365 break;
4366 }
4367
4368 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
4369
sewardja781be62008-12-08 00:12:28 +00004370 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00004371 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00004372 /* return with success */
sewardjffce8152011-06-24 10:09:41 +00004373 tl_assert(cand_thrid);
sewardja781be62008-12-08 00:12:28 +00004374 tl_assert(cand_rcec);
4375 tl_assert(cand_rcec->magic == RCEC_MAGIC);
4376 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00004377 /* Count how many non-zero frames we have. */
4378 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4379 for (n = 0; n < maxNFrames; n++) {
4380 if (0 == cand_rcec->frames[n]) break;
4381 }
sewardjffce8152011-06-24 10:09:41 +00004382 *resEC = VG_(make_ExeContext_from_StackTrace)
4383 (cand_rcec->frames, n);
4384 *resThr = Thr__from_ThrID(cand_thrid);
4385 *resSzB = cand_szB;
4386 *resIsW = cand_isW;
4387 *locksHeldW = cand_locksHeldW;
sewardja781be62008-12-08 00:12:28 +00004388 return True;
4389 }
sewardjc5ea9962008-12-07 01:41:46 +00004390
sewardja781be62008-12-08 00:12:28 +00004391 /* consider next address in toCheck[] */
4392 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00004393
sewardja781be62008-12-08 00:12:28 +00004394 /* really didn't find anything. */
4395 return False;
sewardjf98e1c02008-10-25 16:22:41 +00004396}
4397
4398static void event_map_init ( void )
4399{
4400 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00004401
philippe6643e962012-01-17 21:16:30 +00004402 /* Context (RCEC) pool allocator */
4403 rcec_pool_allocator = VG_(newPA) (
4404 sizeof(RCEC),
4405 1000 /* RCECs per pool */,
4406 HG_(zalloc),
4407 "libhb.event_map_init.1 (RCEC pools)",
4408 HG_(free)
4409 );
sewardjd86e3a22008-12-03 11:39:37 +00004410
4411 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00004412 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00004413 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00004414 N_RCEC_TAB * sizeof(RCEC*) );
4415 tl_assert(contextTab);
4416 for (i = 0; i < N_RCEC_TAB; i++)
4417 contextTab[i] = NULL;
4418
philippe6643e962012-01-17 21:16:30 +00004419 /* Oldref pool allocator */
4420 oldref_pool_allocator = VG_(newPA)(
4421 sizeof(OldRef),
4422 1000 /* OldRefs per pool */,
4423 HG_(zalloc),
4424 "libhb.event_map_init.3 (OldRef pools)",
4425 HG_(free)
4426 );
sewardjd86e3a22008-12-03 11:39:37 +00004427
sewardjd86e3a22008-12-03 11:39:37 +00004428 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00004429 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00004430 oldrefTree = VG_(newSWA)(
4431 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00004432 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00004433 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00004434 );
4435 tl_assert(oldrefTree);
4436
4437 oldrefGen = 0;
4438 oldrefGenIncAt = 0;
4439 oldrefTreeN = 0;
4440}
4441
4442static void event_map__check_reference_counts ( Bool before )
4443{
4444 RCEC* rcec;
4445 OldRef* oldref;
4446 Word i;
4447 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00004448 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00004449
4450 /* Set the 'check' reference counts to zero. Also, optionally
4451 check that the real reference counts are non-zero. We allow
4452 these to fall to zero before a GC, but the GC must get rid of
4453 all those that are zero, hence none should be zero after a
4454 GC. */
4455 for (i = 0; i < N_RCEC_TAB; i++) {
4456 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4457 nEnts++;
4458 tl_assert(rcec);
4459 tl_assert(rcec->magic == RCEC_MAGIC);
4460 if (!before)
4461 tl_assert(rcec->rc > 0);
4462 rcec->rcX = 0;
4463 }
4464 }
4465
4466 /* check that the stats are sane */
4467 tl_assert(nEnts == stats__ctxt_tab_curr);
4468 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4469
4470 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00004471 VG_(initIterSWA)( oldrefTree );
4472 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004473 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004474 tl_assert(oldref->magic == OldRef_MAGIC);
4475 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004476 ThrID aThrID = oldref->accs[i].thrid;
4477 RCEC* aRef = oldref->accs[i].rcec;
4478 if (aThrID != 0) {
sewardjc5ea9962008-12-07 01:41:46 +00004479 tl_assert(aRef);
4480 tl_assert(aRef->magic == RCEC_MAGIC);
4481 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00004482 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004483 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00004484 }
4485 }
4486 }
4487
4488 /* compare check ref counts with actual */
4489 for (i = 0; i < N_RCEC_TAB; i++) {
4490 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4491 tl_assert(rcec->rc == rcec->rcX);
4492 }
4493 }
4494}
4495
sewardj8fd92d32008-11-20 23:17:01 +00004496__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00004497static void event_map_maybe_GC ( void )
4498{
4499 OldRef* oldref;
4500 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00004501 XArray* refs2del;
4502 Word i, j, n2del;
4503
sewardj8fd92d32008-11-20 23:17:01 +00004504 UWord* genMap = NULL;
4505 UWord genMap_min = 0;
4506 UWord genMap_size = 0;
4507
sewardj849b0ed2008-12-21 10:43:10 +00004508 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
sewardjf98e1c02008-10-25 16:22:41 +00004509 return;
4510
4511 if (0)
4512 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
4513
sewardj849b0ed2008-12-21 10:43:10 +00004514 /* Check for sane command line params. Limit values must match
4515 those in hg_process_cmd_line_option. */
4516 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
sewardjf585e482009-08-16 22:52:29 +00004517 tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 );
sewardj849b0ed2008-12-21 10:43:10 +00004518
sewardj8f5374e2008-12-07 11:40:17 +00004519 /* Check our counting is sane (expensive) */
4520 if (CHECK_CEM)
4521 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00004522
sewardj8f5374e2008-12-07 11:40:17 +00004523 /* Check the reference counts (expensive) */
4524 if (CHECK_CEM)
4525 event_map__check_reference_counts( True/*before*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004526
sewardj8fd92d32008-11-20 23:17:01 +00004527 /* Compute the distribution of generation values in the ref tree.
4528 There are likely only to be a few different generation numbers
4529 in the whole tree, but we don't know what they are. Hence use a
4530 dynamically resized array of counters. The array is genMap[0
4531 .. genMap_size-1], where genMap[0] is the count for the
4532 generation number genMap_min, genMap[1] is the count for
4533 genMap_min+1, etc. If a new number is seen outside the range
4534 [genMap_min .. genMap_min + genMap_size - 1] then the array is
4535 copied into a larger array, and genMap_min and genMap_size are
4536 adjusted accordingly. */
4537
sewardjf98e1c02008-10-25 16:22:41 +00004538 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00004539
sewardjbc307e52008-12-06 22:10:54 +00004540 VG_(initIterSWA)( oldrefTree );
4541 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00004542
sewardjd86e3a22008-12-03 11:39:37 +00004543 UWord ea, key;
4544 oldref = (OldRef*)valW;
4545 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00004546
4547 /* BEGIN find 'ea', which is the index in genMap holding the
4548 count for generation number 'key'. */
4549 if (UNLIKELY(genMap == NULL)) {
4550 /* deal with the first key to be seen, so that the following
4551 cases don't need to handle the complexity of a NULL count
4552 array. */
4553 genMap_min = key;
4554 genMap_size = 1;
4555 genMap = HG_(zalloc)( "libhb.emmG.1a",
4556 genMap_size * sizeof(UWord) );
4557 ea = 0;
4558 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
4559 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004560 }
sewardj8fd92d32008-11-20 23:17:01 +00004561 else
4562 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
4563 /* this is the expected (almost-always-happens) case: 'key'
4564 is already mapped in the array. */
4565 ea = key - genMap_min;
4566 }
4567 else
4568 if (key < genMap_min) {
4569 /* 'key' appears before the start of the current array.
4570 Extend the current array by allocating a larger one and
4571 copying the current one to the upper end of it. */
4572 Word more;
4573 UWord* map2;
4574 more = genMap_min - key;
4575 tl_assert(more > 0);
4576 map2 = HG_(zalloc)( "libhb.emmG.1b",
4577 (genMap_size + more) * sizeof(UWord) );
4578 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
4579 HG_(free)( genMap );
4580 genMap = map2;
4581 genMap_size += more;
4582 genMap_min -= more;
4583 ea = 0;
4584 tl_assert(genMap_min == key);
4585 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
4586 key, genMap_min, genMap_min+genMap_size- 1 );
4587 }
4588 else {
4589 /* 'key' appears after the end of the current array. Extend
4590 the current array by allocating a larger one and copying
4591 the current one to the lower end of it. */
4592 Word more;
4593 UWord* map2;
4594 tl_assert(key >= genMap_min + genMap_size);
4595 more = key - (genMap_min + genMap_size) + 1;
4596 tl_assert(more > 0);
4597 map2 = HG_(zalloc)( "libhb.emmG.1c",
4598 (genMap_size + more) * sizeof(UWord) );
4599 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
4600 HG_(free)( genMap );
4601 genMap = map2;
4602 genMap_size += more;
4603 ea = genMap_size - 1;;
4604 tl_assert(genMap_min + genMap_size - 1 == key);
4605 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
4606 key, genMap_min, genMap_min+genMap_size- 1 );
4607 }
4608 /* END find 'ea' from 'key' */
4609
4610 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00004611 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00004612 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00004613 }
4614
sewardj8fd92d32008-11-20 23:17:01 +00004615 tl_assert(genMap);
4616 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00004617
sewardj8fd92d32008-11-20 23:17:01 +00004618 /* Sanity check what we just computed */
4619 { UWord sum = 0;
4620 for (i = 0; i < genMap_size; i++) {
4621 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
4622 i + genMap_min, genMap[i] );
4623 sum += genMap[i];
4624 }
4625 tl_assert(sum == oldrefTreeN);
4626 }
4627
4628 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00004629 retained = oldrefTreeN;
4630 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00004631
4632 for (i = 0; i < genMap_size; i++) {
4633 keyW = i + genMap_min;
4634 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00004635 tl_assert(keyW > 0); /* can't allow a generation # 0 */
4636 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
4637 tl_assert(keyW >= maxGen);
4638 tl_assert(retained >= valW);
4639 if (retained - valW
sewardj849b0ed2008-12-21 10:43:10 +00004640 > (UWord)(HG_(clo_conflict_cache_size)
4641 * EVENT_MAP_GC_DISCARD_FRACTION)) {
sewardjf98e1c02008-10-25 16:22:41 +00004642 retained -= valW;
4643 maxGen = keyW;
4644 } else {
4645 break;
4646 }
4647 }
sewardjf98e1c02008-10-25 16:22:41 +00004648
sewardj8fd92d32008-11-20 23:17:01 +00004649 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00004650
sewardj9b1f0fd2008-11-18 23:40:00 +00004651 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00004652
4653 /* Now make up a big list of the oldrefTree entries we want to
4654 delete. We can't simultaneously traverse the tree and delete
4655 stuff from it, so first we need to copy them off somewhere
4656 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00004657 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00004658 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00004659
sewardj9b1f0fd2008-11-18 23:40:00 +00004660 if (retained < oldrefTreeN) {
4661
4662 /* This is the normal (expected) case. We discard any ref whose
4663 generation number <= maxGen. */
sewardjbc307e52008-12-06 22:10:54 +00004664 VG_(initIterSWA)( oldrefTree );
4665 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004666 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00004667 tl_assert(oldref->magic == OldRef_MAGIC);
4668 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00004669 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00004670 }
sewardjf98e1c02008-10-25 16:22:41 +00004671 }
sewardj5e2ac3b2009-08-11 10:39:25 +00004672 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004673 VG_(message)(Vg_DebugMsg,
4674 "libhb: EvM GC: delete generations %lu and below, "
sewardj24118492009-07-15 14:50:02 +00004675 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00004676 maxGen, retained );
4677 }
4678
4679 } else {
4680
4681 static UInt rand_seed = 0; /* leave as static */
4682
4683 /* Degenerate case: there's only one generation in the entire
4684 tree, so we need to have some other way of deciding which
4685 refs to throw away. Just throw out half of them randomly. */
4686 tl_assert(retained == oldrefTreeN);
sewardjbc307e52008-12-06 22:10:54 +00004687 VG_(initIterSWA)( oldrefTree );
4688 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004689 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00004690 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00004691 tl_assert(oldref->magic == OldRef_MAGIC);
4692 n = VG_(random)( &rand_seed );
4693 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00004694 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00004695 retained--;
4696 }
4697 }
sewardj5e2ac3b2009-08-11 10:39:25 +00004698 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004699 VG_(message)(Vg_DebugMsg,
4700 "libhb: EvM GC: randomly delete half the entries, "
sewardj24118492009-07-15 14:50:02 +00004701 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00004702 retained );
4703 }
4704
sewardjf98e1c02008-10-25 16:22:41 +00004705 }
4706
4707 n2del = VG_(sizeXA)( refs2del );
4708 tl_assert(n2del == (Word)(oldrefTreeN - retained));
4709
4710 if (0) VG_(printf)("%s","deleting entries\n");
4711 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00004712 Bool b;
4713 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
sewardjbc307e52008-12-06 22:10:54 +00004714 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
sewardjd86e3a22008-12-03 11:39:37 +00004715 tl_assert(b);
4716 tl_assert(keyW == ga2del);
4717 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004718 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004719 ThrID aThrID = oldref->accs[j].thrid;
4720 RCEC* aRef = oldref->accs[j].rcec;
sewardjc5ea9962008-12-07 01:41:46 +00004721 if (aRef) {
sewardjffce8152011-06-24 10:09:41 +00004722 tl_assert(aThrID != 0);
sewardjf98e1c02008-10-25 16:22:41 +00004723 stats__ctxt_rcdec3++;
sewardjc5ea9962008-12-07 01:41:46 +00004724 ctxt__rcdec( aRef );
sewardjf98e1c02008-10-25 16:22:41 +00004725 } else {
sewardjffce8152011-06-24 10:09:41 +00004726 tl_assert(aThrID == 0);
sewardjf98e1c02008-10-25 16:22:41 +00004727 }
4728 }
sewardjd86e3a22008-12-03 11:39:37 +00004729
4730 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00004731 }
4732
4733 VG_(deleteXA)( refs2del );
4734
sewardjc5ea9962008-12-07 01:41:46 +00004735 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00004736
4737 oldrefTreeN = retained;
4738 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
4739
4740 /* Throw away all RCECs with zero reference counts */
4741 for (i = 0; i < N_RCEC_TAB; i++) {
4742 RCEC** pp = &contextTab[i];
4743 RCEC* p = *pp;
4744 while (p) {
4745 if (p->rc == 0) {
4746 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004747 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004748 p = *pp;
4749 tl_assert(stats__ctxt_tab_curr > 0);
4750 stats__ctxt_tab_curr--;
4751 } else {
4752 pp = &p->next;
4753 p = p->next;
4754 }
4755 }
4756 }
4757
sewardj8f5374e2008-12-07 11:40:17 +00004758 /* Check the reference counts (expensive) */
4759 if (CHECK_CEM)
4760 event_map__check_reference_counts( False/*after*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004761
4762 //if (0)
4763 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
4764 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
4765
4766}
4767
4768
4769/////////////////////////////////////////////////////////
4770// //
4771// Core MSM //
4772// //
4773/////////////////////////////////////////////////////////
4774
sewardj23f12002009-07-24 08:45:08 +00004775/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4776 Nov 08, and again after [...],
4777 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004778
sewardj23f12002009-07-24 08:45:08 +00004779static ULong stats__msmcread = 0;
4780static ULong stats__msmcread_change = 0;
4781static ULong stats__msmcwrite = 0;
4782static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004783
sewardj8ab2c132009-08-02 09:34:35 +00004784/* Some notes on the H1 history mechanism:
4785
4786 Transition rules are:
4787
4788 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4789 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4790
4791 After any access by a thread T to a location L, L's constraint pair
4792 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4793
4794 After a race by thread T conflicting with some previous access by
4795 some other thread U, for a location with constraint (before
4796 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4797 which the previously access lies.
4798
4799 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4800 are compared so as to find out which thread(s) this access
4801 conflicts with. Once that is established, we also require the
4802 pre-update Cw for the location, so we can index into it for those
4803 threads, to get the scalar clock values for the point at which the
4804 former accesses were made. (In fact we only bother to do any of
4805 this for an arbitrarily chosen one of the conflicting threads, as
4806 that's simpler, it avoids flooding the user with vast amounts of
4807 mostly useless information, and because the program is wrong if it
4808 contains any races at all -- so we don't really need to show all
4809 conflicting access pairs initially, so long as we only show none if
4810 none exist).
4811
4812 ---
4813
4814 That requires the auxiliary proof that
4815
4816 (Cr `join` Kw)[T] == Kw[T]
4817
4818 Why should that be true? Because for any thread T, Kw[T] >= the
4819 scalar clock value for T known by any other thread. In other
4820 words, because T's value for its own scalar clock is at least as up
4821 to date as the value for it known by any other thread (that is true
4822 for both the R- and W- scalar clocks). Hence no other thread will
4823 be able to feed in a value for that element (indirectly via a
4824 constraint) which will exceed Kw[T], and hence the join cannot
4825 cause that particular element to advance.
4826*/
4827
sewardjf98e1c02008-10-25 16:22:41 +00004828__attribute__((noinline))
4829static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004830 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00004831 VtsID Cfailed,
4832 VtsID Kfailed,
4833 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00004834{
sewardjc5ea9962008-12-07 01:41:46 +00004835 /* Call here to report a race. We just hand it onwards to
4836 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004837 error is going to be collected, then, at history_level 2, that
4838 queries the conflicting-event map. The alternative would be to
4839 query it right here. But that causes a lot of pointless queries
4840 for errors which will shortly be discarded as duplicates, and
4841 can become a performance overhead; so we defer the query until
4842 we know the error is not a duplicate. */
4843
4844 /* Stacks for the bounds of the (or one of the) conflicting
4845 segment(s). These are only set at history_level 1. */
4846 ExeContext* hist1_seg_start = NULL;
4847 ExeContext* hist1_seg_end = NULL;
4848 Thread* hist1_conf_thr = NULL;
4849
4850 tl_assert(acc_thr);
sewardj60626642011-03-10 15:14:37 +00004851 tl_assert(acc_thr->hgthread);
4852 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
sewardj23f12002009-07-24 08:45:08 +00004853 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4854
4855 if (HG_(clo_history_level) == 1) {
4856 Bool found;
4857 Word firstIx, lastIx;
4858 ULong_n_EC key;
4859
4860 /* At history_level 1, we must round up the relevant stack-pair
4861 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00004862 deferring it is complex; we can't (easily) put Kfailed and
4863 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00004864 getting tied up in difficulties with VtsID reference
4865 counting. So just do it now. */
4866 Thr* confThr;
4867 ULong confTym = 0;
4868 /* Which thread are we in conflict with? There may be more than
4869 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4870 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00004871 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00004872 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00004873 conflict (semantics of return value of
4874 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4875 called us, just checked exactly this -- that there was in
4876 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00004877 tl_assert(confThr);
4878
4879 /* Get the scalar clock value that the conflicting thread
4880 introduced into the constraint. A careful examination of the
4881 base machine rules shows that this must be the same as the
4882 conflicting thread's scalar clock when it created this
4883 constraint. Hence we know the scalar clock of the
4884 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00004885 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00004886
4887 /* Using this scalar clock, index into the conflicting thread's
4888 collection of stack traces made each time its vector clock
4889 (hence its scalar clock) changed. This gives the stack
4890 traces at the start and end of the conflicting segment (well,
4891 as per comment just above, of one of the conflicting
4892 segments, if there are more than one). */
4893 key.ull = confTym;
4894 key.ec = NULL;
4895 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00004896 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004897 firstIx = lastIx = 0;
4898 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00004899 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004900 &key, &firstIx, &lastIx,
florian6bd9dc12012-11-23 16:17:43 +00004901 (XACmpFn_t)cmp__ULong_n_EC__by_ULong
sewardj23f12002009-07-24 08:45:08 +00004902 );
sewardj8ab2c132009-08-02 09:34:35 +00004903 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00004904 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00004905 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00004906 confThr, confTym, found, firstIx, lastIx);
4907 /* We can't indefinitely collect stack traces at VTS
4908 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00004909 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00004910 ones, which in turn means we might fail to find index value
4911 confTym in the array. */
4912 if (found) {
4913 ULong_n_EC *pair_start, *pair_end;
4914 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00004915 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00004916 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004917 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00004918 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00004919 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004920 lastIx+1 );
4921 /* from properties of VG_(lookupXA) and the comparison fn used: */
4922 tl_assert(pair_start->ull < pair_end->ull);
4923 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004924 /* Could do a bit better here. It may be that pair_end
4925 doesn't have a stack, but the following entries in the
4926 array have the same scalar Kw and to have a stack. So
4927 we should search a bit further along the array than
4928 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00004929 } else {
sewardjffce8152011-06-24 10:09:41 +00004930 if (!confThr->llexit_done)
sewardj23f12002009-07-24 08:45:08 +00004931 hist1_seg_end = main_get_EC( confThr );
4932 }
4933 // seg_start could be NULL iff this is the first stack in the thread
4934 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4935 //if (seg_end) VG_(pp_ExeContext)(seg_end);
sewardj60626642011-03-10 15:14:37 +00004936 hist1_conf_thr = confThr->hgthread;
sewardj23f12002009-07-24 08:45:08 +00004937 }
4938 }
4939
sewardj60626642011-03-10 15:14:37 +00004940 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00004941 szB, isWrite,
4942 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00004943}
4944
4945static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00004946 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00004947 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00004948 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4949 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00004950}
4951
4952
4953/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00004954static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004955 /* The following are only needed for
4956 creating error reports. */
4957 Thr* acc_thr,
4958 Addr acc_addr, SizeT szB )
4959{
4960 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004961 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00004962
4963 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004964 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004965 tl_assert(is_sane_SVal_C(svOld));
4966 }
4967
sewardj1c0ce7a2009-07-01 08:10:49 +00004968 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004969 VtsID tviR = acc_thr->viR;
4970 VtsID tviW = acc_thr->viW;
4971 VtsID rmini = SVal__unC_Rmin(svOld);
4972 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004973 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4974 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004975 /* no race */
4976 /* Note: RWLOCK subtlety: use tviW, not tviR */
4977 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4978 goto out;
4979 } else {
sewardjb0e009d2008-11-19 16:35:15 +00004980 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004981 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4982 tl_assert(leqxx);
4983 // same as in non-race case
4984 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4985 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004986 rmini, /* Cfailed */
4987 tviR, /* Kfailed */
4988 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004989 goto out;
4990 }
4991 }
4992 if (SVal__isA(svOld)) {
4993 /* reading no-access memory (sigh); leave unchanged */
4994 /* check for no pollution */
4995 tl_assert(svOld == SVal_NOACCESS);
4996 svNew = SVal_NOACCESS;
4997 goto out;
4998 }
sewardj23f12002009-07-24 08:45:08 +00004999 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005000 tl_assert(0);
5001
5002 out:
sewardj8f5374e2008-12-07 11:40:17 +00005003 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005004 tl_assert(is_sane_SVal_C(svNew));
5005 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005006 if (UNLIKELY(svNew != svOld)) {
5007 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005008 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005009 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005010 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005011 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005012 }
5013 }
5014 return svNew;
5015}
5016
5017
5018/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00005019static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005020 /* The following are only needed for
5021 creating error reports. */
5022 Thr* acc_thr,
5023 Addr acc_addr, SizeT szB )
5024{
5025 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005026 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00005027
5028 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005029 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005030 tl_assert(is_sane_SVal_C(svOld));
5031 }
5032
sewardj1c0ce7a2009-07-01 08:10:49 +00005033 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005034 VtsID tviW = acc_thr->viW;
5035 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005036 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5037 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005038 /* no race */
5039 svNew = SVal__mkC( tviW, tviW );
5040 goto out;
5041 } else {
5042 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00005043 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005044 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5045 tl_assert(leqxx);
5046 // same as in non-race case
5047 // proof: in the non-race case, we have
5048 // rmini <= wmini (invar on constraints)
5049 // tviW <= tviR (invar on thread clocks)
5050 // wmini <= tviW (from run-time check)
5051 // hence from transitivity of <= we have
5052 // rmini <= wmini <= tviW
5053 // and so join(rmini,tviW) == tviW
5054 // and join(wmini,tviW) == tviW
5055 // qed.
5056 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5057 VtsID__join2(wmini, tviW) );
5058 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005059 wmini, /* Cfailed */
5060 tviW, /* Kfailed */
5061 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005062 goto out;
5063 }
5064 }
5065 if (SVal__isA(svOld)) {
5066 /* writing no-access memory (sigh); leave unchanged */
5067 /* check for no pollution */
5068 tl_assert(svOld == SVal_NOACCESS);
5069 svNew = SVal_NOACCESS;
5070 goto out;
5071 }
sewardj23f12002009-07-24 08:45:08 +00005072 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005073 tl_assert(0);
5074
5075 out:
sewardj8f5374e2008-12-07 11:40:17 +00005076 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005077 tl_assert(is_sane_SVal_C(svNew));
5078 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005079 if (UNLIKELY(svNew != svOld)) {
5080 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005081 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005082 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005083 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005084 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005085 }
5086 }
5087 return svNew;
5088}
5089
5090
5091/////////////////////////////////////////////////////////
5092// //
5093// Apply core MSM to specific memory locations //
5094// //
5095/////////////////////////////////////////////////////////
5096
sewardj23f12002009-07-24 08:45:08 +00005097/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005098
sewardj23f12002009-07-24 08:45:08 +00005099static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005100 CacheLine* cl;
5101 UWord cloff, tno, toff;
5102 SVal svOld, svNew;
5103 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005104 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005105 cl = get_cacheline(a);
5106 cloff = get_cacheline_offset(a);
5107 tno = get_treeno(a);
5108 toff = get_tree_offset(a); /* == 0 .. 7 */
5109 descr = cl->descrs[tno];
5110 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5111 SVal* tree = &cl->svals[tno << 3];
5112 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005113 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005114 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5115 }
5116 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005117 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005118 if (CHECK_ZSM)
5119 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005120 cl->svals[cloff] = svNew;
5121}
5122
sewardj23f12002009-07-24 08:45:08 +00005123static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005124 CacheLine* cl;
5125 UWord cloff, tno, toff;
5126 SVal svOld, svNew;
5127 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005128 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005129 cl = get_cacheline(a);
5130 cloff = get_cacheline_offset(a);
5131 tno = get_treeno(a);
5132 toff = get_tree_offset(a); /* == 0 .. 7 */
5133 descr = cl->descrs[tno];
5134 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5135 SVal* tree = &cl->svals[tno << 3];
5136 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005137 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005138 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5139 }
5140 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005141 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005142 if (CHECK_ZSM)
5143 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005144 cl->svals[cloff] = svNew;
5145}
5146
sewardj23f12002009-07-24 08:45:08 +00005147/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005148
sewardj23f12002009-07-24 08:45:08 +00005149static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005150 CacheLine* cl;
5151 UWord cloff, tno, toff;
5152 SVal svOld, svNew;
5153 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005154 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005155 if (UNLIKELY(!aligned16(a))) goto slowcase;
5156 cl = get_cacheline(a);
5157 cloff = get_cacheline_offset(a);
5158 tno = get_treeno(a);
5159 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5160 descr = cl->descrs[tno];
5161 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5162 if (valid_value_is_below_me_16(descr, toff)) {
5163 goto slowcase;
5164 } else {
5165 SVal* tree = &cl->svals[tno << 3];
5166 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5167 }
sewardj8f5374e2008-12-07 11:40:17 +00005168 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005169 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5170 }
5171 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005172 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005173 if (CHECK_ZSM)
5174 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005175 cl->svals[cloff] = svNew;
5176 return;
5177 slowcase: /* misaligned, or must go further down the tree */
5178 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005179 zsm_sapply08__msmcread( thr, a + 0 );
5180 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005181}
5182
sewardj23f12002009-07-24 08:45:08 +00005183static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005184 CacheLine* cl;
5185 UWord cloff, tno, toff;
5186 SVal svOld, svNew;
5187 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005188 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005189 if (UNLIKELY(!aligned16(a))) goto slowcase;
5190 cl = get_cacheline(a);
5191 cloff = get_cacheline_offset(a);
5192 tno = get_treeno(a);
5193 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5194 descr = cl->descrs[tno];
5195 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5196 if (valid_value_is_below_me_16(descr, toff)) {
5197 goto slowcase;
5198 } else {
5199 SVal* tree = &cl->svals[tno << 3];
5200 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5201 }
sewardj8f5374e2008-12-07 11:40:17 +00005202 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005203 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5204 }
5205 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005206 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005207 if (CHECK_ZSM)
5208 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005209 cl->svals[cloff] = svNew;
5210 return;
5211 slowcase: /* misaligned, or must go further down the tree */
5212 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005213 zsm_sapply08__msmcwrite( thr, a + 0 );
5214 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005215}
5216
sewardj23f12002009-07-24 08:45:08 +00005217/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005218
sewardj23f12002009-07-24 08:45:08 +00005219static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005220 CacheLine* cl;
5221 UWord cloff, tno, toff;
5222 SVal svOld, svNew;
5223 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005224 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005225 if (UNLIKELY(!aligned32(a))) goto slowcase;
5226 cl = get_cacheline(a);
5227 cloff = get_cacheline_offset(a);
5228 tno = get_treeno(a);
5229 toff = get_tree_offset(a); /* == 0 or 4 */
5230 descr = cl->descrs[tno];
5231 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5232 if (valid_value_is_above_me_32(descr, toff)) {
5233 SVal* tree = &cl->svals[tno << 3];
5234 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5235 } else {
5236 goto slowcase;
5237 }
sewardj8f5374e2008-12-07 11:40:17 +00005238 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005239 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5240 }
5241 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005242 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005243 if (CHECK_ZSM)
5244 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005245 cl->svals[cloff] = svNew;
5246 return;
5247 slowcase: /* misaligned, or must go further down the tree */
5248 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005249 zsm_sapply16__msmcread( thr, a + 0 );
5250 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005251}
5252
sewardj23f12002009-07-24 08:45:08 +00005253static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005254 CacheLine* cl;
5255 UWord cloff, tno, toff;
5256 SVal svOld, svNew;
5257 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005258 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005259 if (UNLIKELY(!aligned32(a))) goto slowcase;
5260 cl = get_cacheline(a);
5261 cloff = get_cacheline_offset(a);
5262 tno = get_treeno(a);
5263 toff = get_tree_offset(a); /* == 0 or 4 */
5264 descr = cl->descrs[tno];
5265 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5266 if (valid_value_is_above_me_32(descr, toff)) {
5267 SVal* tree = &cl->svals[tno << 3];
5268 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5269 } else {
5270 goto slowcase;
5271 }
sewardj8f5374e2008-12-07 11:40:17 +00005272 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005273 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5274 }
5275 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005276 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005277 if (CHECK_ZSM)
5278 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005279 cl->svals[cloff] = svNew;
5280 return;
5281 slowcase: /* misaligned, or must go further down the tree */
5282 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005283 zsm_sapply16__msmcwrite( thr, a + 0 );
5284 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005285}
5286
sewardj23f12002009-07-24 08:45:08 +00005287/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005288
sewardj23f12002009-07-24 08:45:08 +00005289static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005290 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005291 UWord cloff, tno;
5292 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005293 SVal svOld, svNew;
5294 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005295 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005296 if (UNLIKELY(!aligned64(a))) goto slowcase;
5297 cl = get_cacheline(a);
5298 cloff = get_cacheline_offset(a);
5299 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005300 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005301 descr = cl->descrs[tno];
5302 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5303 goto slowcase;
5304 }
5305 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005306 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005307 if (CHECK_ZSM)
5308 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005309 cl->svals[cloff] = svNew;
5310 return;
5311 slowcase: /* misaligned, or must go further down the tree */
5312 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005313 zsm_sapply32__msmcread( thr, a + 0 );
5314 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005315}
5316
sewardj23f12002009-07-24 08:45:08 +00005317static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005318 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005319 UWord cloff, tno;
5320 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005321 SVal svOld, svNew;
5322 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005323 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005324 if (UNLIKELY(!aligned64(a))) goto slowcase;
5325 cl = get_cacheline(a);
5326 cloff = get_cacheline_offset(a);
5327 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005328 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005329 descr = cl->descrs[tno];
5330 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5331 goto slowcase;
5332 }
5333 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005334 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005335 if (CHECK_ZSM)
5336 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005337 cl->svals[cloff] = svNew;
5338 return;
5339 slowcase: /* misaligned, or must go further down the tree */
5340 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005341 zsm_sapply32__msmcwrite( thr, a + 0 );
5342 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005343}
5344
sewardj23f12002009-07-24 08:45:08 +00005345/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005346
5347static
sewardj23f12002009-07-24 08:45:08 +00005348void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005349 CacheLine* cl;
5350 UWord cloff, tno, toff;
5351 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005352 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005353 cl = get_cacheline(a);
5354 cloff = get_cacheline_offset(a);
5355 tno = get_treeno(a);
5356 toff = get_tree_offset(a); /* == 0 .. 7 */
5357 descr = cl->descrs[tno];
5358 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5359 SVal* tree = &cl->svals[tno << 3];
5360 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005361 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005362 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5363 }
5364 tl_assert(svNew != SVal_INVALID);
5365 cl->svals[cloff] = svNew;
5366}
5367
sewardj23f12002009-07-24 08:45:08 +00005368/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005369
5370static
sewardj23f12002009-07-24 08:45:08 +00005371void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005372 CacheLine* cl;
5373 UWord cloff, tno, toff;
5374 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005375 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005376 if (UNLIKELY(!aligned16(a))) goto slowcase;
5377 cl = get_cacheline(a);
5378 cloff = get_cacheline_offset(a);
5379 tno = get_treeno(a);
5380 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5381 descr = cl->descrs[tno];
5382 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5383 if (valid_value_is_below_me_16(descr, toff)) {
5384 /* Writing at this level. Need to fix up 'descr'. */
5385 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5386 /* At this point, the tree does not match cl->descr[tno] any
5387 more. The assignments below will fix it up. */
5388 } else {
5389 /* We can't indiscriminately write on the w16 node as in the
5390 w64 case, as that might make the node inconsistent with
5391 its parent. So first, pull down to this level. */
5392 SVal* tree = &cl->svals[tno << 3];
5393 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005394 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005395 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5396 }
5397 }
5398 tl_assert(svNew != SVal_INVALID);
5399 cl->svals[cloff + 0] = svNew;
5400 cl->svals[cloff + 1] = SVal_INVALID;
5401 return;
5402 slowcase: /* misaligned */
5403 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005404 zsm_swrite08( a + 0, svNew );
5405 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005406}
5407
sewardj23f12002009-07-24 08:45:08 +00005408/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005409
5410static
sewardj23f12002009-07-24 08:45:08 +00005411void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005412 CacheLine* cl;
5413 UWord cloff, tno, toff;
5414 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005415 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005416 if (UNLIKELY(!aligned32(a))) goto slowcase;
5417 cl = get_cacheline(a);
5418 cloff = get_cacheline_offset(a);
5419 tno = get_treeno(a);
5420 toff = get_tree_offset(a); /* == 0 or 4 */
5421 descr = cl->descrs[tno];
5422 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5423 if (valid_value_is_above_me_32(descr, toff)) {
5424 /* We can't indiscriminately write on the w32 node as in the
5425 w64 case, as that might make the node inconsistent with
5426 its parent. So first, pull down to this level. */
5427 SVal* tree = &cl->svals[tno << 3];
5428 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005429 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005430 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5431 } else {
5432 /* Writing at this level. Need to fix up 'descr'. */
5433 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5434 /* At this point, the tree does not match cl->descr[tno] any
5435 more. The assignments below will fix it up. */
5436 }
5437 }
5438 tl_assert(svNew != SVal_INVALID);
5439 cl->svals[cloff + 0] = svNew;
5440 cl->svals[cloff + 1] = SVal_INVALID;
5441 cl->svals[cloff + 2] = SVal_INVALID;
5442 cl->svals[cloff + 3] = SVal_INVALID;
5443 return;
5444 slowcase: /* misaligned */
5445 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005446 zsm_swrite16( a + 0, svNew );
5447 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005448}
5449
sewardj23f12002009-07-24 08:45:08 +00005450/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005451
5452static
sewardj23f12002009-07-24 08:45:08 +00005453void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005454 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005455 UWord cloff, tno;
5456 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00005457 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005458 if (UNLIKELY(!aligned64(a))) goto slowcase;
5459 cl = get_cacheline(a);
5460 cloff = get_cacheline_offset(a);
5461 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005462 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005463 cl->descrs[tno] = TREE_DESCR_64;
5464 tl_assert(svNew != SVal_INVALID);
5465 cl->svals[cloff + 0] = svNew;
5466 cl->svals[cloff + 1] = SVal_INVALID;
5467 cl->svals[cloff + 2] = SVal_INVALID;
5468 cl->svals[cloff + 3] = SVal_INVALID;
5469 cl->svals[cloff + 4] = SVal_INVALID;
5470 cl->svals[cloff + 5] = SVal_INVALID;
5471 cl->svals[cloff + 6] = SVal_INVALID;
5472 cl->svals[cloff + 7] = SVal_INVALID;
5473 return;
5474 slowcase: /* misaligned */
5475 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005476 zsm_swrite32( a + 0, svNew );
5477 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005478}
5479
sewardj23f12002009-07-24 08:45:08 +00005480/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005481
5482static
sewardj23f12002009-07-24 08:45:08 +00005483SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005484 CacheLine* cl;
5485 UWord cloff, tno, toff;
5486 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005487 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005488 cl = get_cacheline(a);
5489 cloff = get_cacheline_offset(a);
5490 tno = get_treeno(a);
5491 toff = get_tree_offset(a); /* == 0 .. 7 */
5492 descr = cl->descrs[tno];
5493 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5494 SVal* tree = &cl->svals[tno << 3];
5495 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5496 }
5497 return cl->svals[cloff];
5498}
5499
sewardj23f12002009-07-24 08:45:08 +00005500static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00005501 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00005502 stats__cline_scopy08s++;
5503 sv = zsm_sread08( src );
5504 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00005505}
5506
5507
sewardj23f12002009-07-24 08:45:08 +00005508/* Block-copy states (needed for implementing realloc()). Note this
5509 doesn't change the filtering arrangements. The caller of
5510 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00005511
sewardj23f12002009-07-24 08:45:08 +00005512static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005513{
5514 SizeT i;
5515 if (len == 0)
5516 return;
5517
5518 /* assert for non-overlappingness */
5519 tl_assert(src+len <= dst || dst+len <= src);
5520
5521 /* To be simple, just copy byte by byte. But so as not to wreck
5522 performance for later accesses to dst[0 .. len-1], normalise
5523 destination lines as we finish with them, and also normalise the
5524 line containing the first and last address. */
5525 for (i = 0; i < len; i++) {
5526 Bool normalise
5527 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5528 || i == 0 /* first in range */
5529 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00005530 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00005531 }
5532}
5533
5534
5535/* For setting address ranges to a given value. Has considerable
5536 sophistication so as to avoid generating large numbers of pointless
5537 cache loads/writebacks for large ranges. */
5538
5539/* Do small ranges in-cache, in the obvious way. */
5540static
sewardj23f12002009-07-24 08:45:08 +00005541void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005542{
5543 /* fast track a couple of common cases */
5544 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00005545 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005546 return;
5547 }
5548 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00005549 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005550 return;
5551 }
5552
5553 /* be completely general (but as efficient as possible) */
5554 if (len == 0) return;
5555
5556 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005557 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005558 a += 1;
5559 len -= 1;
5560 tl_assert(aligned16(a));
5561 }
5562 if (len == 0) return;
5563
5564 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005565 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005566 a += 2;
5567 len -= 2;
5568 tl_assert(aligned32(a));
5569 }
5570 if (len == 0) return;
5571
5572 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005573 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005574 a += 4;
5575 len -= 4;
5576 tl_assert(aligned64(a));
5577 }
5578 if (len == 0) return;
5579
5580 if (len >= 8) {
5581 tl_assert(aligned64(a));
5582 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00005583 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005584 a += 8;
5585 len -= 8;
5586 }
5587 tl_assert(aligned64(a));
5588 }
5589 if (len == 0) return;
5590
5591 if (len >= 4)
5592 tl_assert(aligned32(a));
5593 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005594 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005595 a += 4;
5596 len -= 4;
5597 }
5598 if (len == 0) return;
5599
5600 if (len >= 2)
5601 tl_assert(aligned16(a));
5602 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005603 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005604 a += 2;
5605 len -= 2;
5606 }
5607 if (len == 0) return;
5608
5609 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005610 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00005611 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00005612 len -= 1;
5613 }
5614 tl_assert(len == 0);
5615}
5616
5617
sewardj23f12002009-07-24 08:45:08 +00005618/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00005619 for larger ranges, try to operate directly on the out-of-cache
5620 representation, rather than dragging lines into the cache,
5621 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00005622 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00005623
sewardj23f12002009-07-24 08:45:08 +00005624 Note that this doesn't change the filtering arrangements. The
5625 caller of zsm_sset_range needs to attend to that. */
5626
5627static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005628{
5629 tl_assert(svNew != SVal_INVALID);
5630 stats__cache_make_New_arange += (ULong)len;
5631
5632 if (0 && len > 500)
5633 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
5634
5635 if (0) {
5636 static UWord n_New_in_cache = 0;
5637 static UWord n_New_not_in_cache = 0;
5638 /* tag is 'a' with the in-line offset masked out,
5639 eg a[31]..a[4] 0000 */
5640 Addr tag = a & ~(N_LINE_ARANGE - 1);
5641 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5642 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5643 n_New_in_cache++;
5644 } else {
5645 n_New_not_in_cache++;
5646 }
5647 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5648 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5649 n_New_in_cache, n_New_not_in_cache );
5650 }
5651
5652 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00005653 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005654 } else {
5655 Addr before_start = a;
5656 Addr aligned_start = cacheline_ROUNDUP(a);
5657 Addr after_start = cacheline_ROUNDDN(a + len);
5658 UWord before_len = aligned_start - before_start;
5659 UWord aligned_len = after_start - aligned_start;
5660 UWord after_len = a + len - after_start;
5661 tl_assert(before_start <= aligned_start);
5662 tl_assert(aligned_start <= after_start);
5663 tl_assert(before_len < N_LINE_ARANGE);
5664 tl_assert(after_len < N_LINE_ARANGE);
5665 tl_assert(get_cacheline_offset(aligned_start) == 0);
5666 if (get_cacheline_offset(a) == 0) {
5667 tl_assert(before_len == 0);
5668 tl_assert(a == aligned_start);
5669 }
5670 if (get_cacheline_offset(a+len) == 0) {
5671 tl_assert(after_len == 0);
5672 tl_assert(after_start == a+len);
5673 }
5674 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005675 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005676 }
5677 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005678 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005679 }
5680 stats__cache_make_New_inZrep += (ULong)aligned_len;
5681
5682 while (1) {
5683 Addr tag;
5684 UWord wix;
5685 if (aligned_start >= after_start)
5686 break;
5687 tl_assert(get_cacheline_offset(aligned_start) == 0);
5688 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5689 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5690 if (tag == cache_shmem.tags0[wix]) {
5691 UWord i;
5692 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005693 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005694 } else {
5695 UWord i;
5696 Word zix;
5697 SecMap* sm;
5698 LineZ* lineZ;
5699 /* This line is not in the cache. Do not force it in; instead
5700 modify it in-place. */
5701 /* find the Z line to write in and rcdec it or the
5702 associated F line. */
5703 find_Z_for_writing( &sm, &zix, tag );
5704 tl_assert(sm);
5705 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5706 lineZ = &sm->linesZ[zix];
5707 lineZ->dict[0] = svNew;
5708 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5709 for (i = 0; i < N_LINE_ARANGE/4; i++)
5710 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5711 rcinc_LineZ(lineZ);
5712 }
5713 aligned_start += N_LINE_ARANGE;
5714 aligned_len -= N_LINE_ARANGE;
5715 }
5716 tl_assert(aligned_start == after_start);
5717 tl_assert(aligned_len == 0);
5718 }
5719}
5720
5721
5722/////////////////////////////////////////////////////////
5723// //
sewardj23f12002009-07-24 08:45:08 +00005724// Front-filtering accesses //
5725// //
5726/////////////////////////////////////////////////////////
5727
5728static UWord stats__f_ac = 0;
5729static UWord stats__f_sk = 0;
5730
5731#if 0
5732# define STATS__F_SHOW \
5733 do { \
5734 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5735 VG_(printf)("filters: ac %lu sk %lu\n", \
5736 stats__f_ac, stats__f_sk); \
5737 } while (0)
5738#else
5739# define STATS__F_SHOW /* */
5740#endif
5741
5742void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5743 stats__f_ac++;
5744 STATS__F_SHOW;
5745 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5746 stats__f_sk++;
5747 return;
5748 }
5749 zsm_sapply08__msmcwrite(thr, a);
5750}
5751
5752void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5753 stats__f_ac++;
5754 STATS__F_SHOW;
5755 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5756 stats__f_sk++;
5757 return;
5758 }
5759 zsm_sapply16__msmcwrite(thr, a);
5760}
5761
5762void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5763 stats__f_ac++;
5764 STATS__F_SHOW;
5765 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5766 stats__f_sk++;
5767 return;
5768 }
5769 zsm_sapply32__msmcwrite(thr, a);
5770}
5771
5772void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5773 stats__f_ac++;
5774 STATS__F_SHOW;
5775 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5776 stats__f_sk++;
5777 return;
5778 }
5779 zsm_sapply64__msmcwrite(thr, a);
5780}
5781
5782void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5783{
5784 /* fast track a couple of common cases */
5785 if (len == 4 && aligned32(a)) {
5786 zsm_sapply32_f__msmcwrite( thr, a );
5787 return;
5788 }
5789 if (len == 8 && aligned64(a)) {
5790 zsm_sapply64_f__msmcwrite( thr, a );
5791 return;
5792 }
5793
5794 /* be completely general (but as efficient as possible) */
5795 if (len == 0) return;
5796
5797 if (!aligned16(a) && len >= 1) {
5798 zsm_sapply08_f__msmcwrite( thr, a );
5799 a += 1;
5800 len -= 1;
5801 tl_assert(aligned16(a));
5802 }
5803 if (len == 0) return;
5804
5805 if (!aligned32(a) && len >= 2) {
5806 zsm_sapply16_f__msmcwrite( thr, a );
5807 a += 2;
5808 len -= 2;
5809 tl_assert(aligned32(a));
5810 }
5811 if (len == 0) return;
5812
5813 if (!aligned64(a) && len >= 4) {
5814 zsm_sapply32_f__msmcwrite( thr, a );
5815 a += 4;
5816 len -= 4;
5817 tl_assert(aligned64(a));
5818 }
5819 if (len == 0) return;
5820
5821 if (len >= 8) {
5822 tl_assert(aligned64(a));
5823 while (len >= 8) {
5824 zsm_sapply64_f__msmcwrite( thr, a );
5825 a += 8;
5826 len -= 8;
5827 }
5828 tl_assert(aligned64(a));
5829 }
5830 if (len == 0) return;
5831
5832 if (len >= 4)
5833 tl_assert(aligned32(a));
5834 if (len >= 4) {
5835 zsm_sapply32_f__msmcwrite( thr, a );
5836 a += 4;
5837 len -= 4;
5838 }
5839 if (len == 0) return;
5840
5841 if (len >= 2)
5842 tl_assert(aligned16(a));
5843 if (len >= 2) {
5844 zsm_sapply16_f__msmcwrite( thr, a );
5845 a += 2;
5846 len -= 2;
5847 }
5848 if (len == 0) return;
5849
5850 if (len >= 1) {
5851 zsm_sapply08_f__msmcwrite( thr, a );
5852 //a += 1;
5853 len -= 1;
5854 }
5855 tl_assert(len == 0);
5856}
5857
5858void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5859 stats__f_ac++;
5860 STATS__F_SHOW;
5861 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5862 stats__f_sk++;
5863 return;
5864 }
5865 zsm_sapply08__msmcread(thr, a);
5866}
5867
5868void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5869 stats__f_ac++;
5870 STATS__F_SHOW;
5871 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5872 stats__f_sk++;
5873 return;
5874 }
5875 zsm_sapply16__msmcread(thr, a);
5876}
5877
5878void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5879 stats__f_ac++;
5880 STATS__F_SHOW;
5881 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5882 stats__f_sk++;
5883 return;
5884 }
5885 zsm_sapply32__msmcread(thr, a);
5886}
5887
5888void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5889 stats__f_ac++;
5890 STATS__F_SHOW;
5891 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5892 stats__f_sk++;
5893 return;
5894 }
5895 zsm_sapply64__msmcread(thr, a);
5896}
5897
5898void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5899{
5900 /* fast track a couple of common cases */
5901 if (len == 4 && aligned32(a)) {
5902 zsm_sapply32_f__msmcread( thr, a );
5903 return;
5904 }
5905 if (len == 8 && aligned64(a)) {
5906 zsm_sapply64_f__msmcread( thr, a );
5907 return;
5908 }
5909
5910 /* be completely general (but as efficient as possible) */
5911 if (len == 0) return;
5912
5913 if (!aligned16(a) && len >= 1) {
5914 zsm_sapply08_f__msmcread( thr, a );
5915 a += 1;
5916 len -= 1;
5917 tl_assert(aligned16(a));
5918 }
5919 if (len == 0) return;
5920
5921 if (!aligned32(a) && len >= 2) {
5922 zsm_sapply16_f__msmcread( thr, a );
5923 a += 2;
5924 len -= 2;
5925 tl_assert(aligned32(a));
5926 }
5927 if (len == 0) return;
5928
5929 if (!aligned64(a) && len >= 4) {
5930 zsm_sapply32_f__msmcread( thr, a );
5931 a += 4;
5932 len -= 4;
5933 tl_assert(aligned64(a));
5934 }
5935 if (len == 0) return;
5936
5937 if (len >= 8) {
5938 tl_assert(aligned64(a));
5939 while (len >= 8) {
5940 zsm_sapply64_f__msmcread( thr, a );
5941 a += 8;
5942 len -= 8;
5943 }
5944 tl_assert(aligned64(a));
5945 }
5946 if (len == 0) return;
5947
5948 if (len >= 4)
5949 tl_assert(aligned32(a));
5950 if (len >= 4) {
5951 zsm_sapply32_f__msmcread( thr, a );
5952 a += 4;
5953 len -= 4;
5954 }
5955 if (len == 0) return;
5956
5957 if (len >= 2)
5958 tl_assert(aligned16(a));
5959 if (len >= 2) {
5960 zsm_sapply16_f__msmcread( thr, a );
5961 a += 2;
5962 len -= 2;
5963 }
5964 if (len == 0) return;
5965
5966 if (len >= 1) {
5967 zsm_sapply08_f__msmcread( thr, a );
5968 //a += 1;
5969 len -= 1;
5970 }
5971 tl_assert(len == 0);
5972}
5973
5974void libhb_Thr_resumes ( Thr* thr )
5975{
5976 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00005977 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00005978 tl_assert(!thr->llexit_done);
sewardj23f12002009-07-24 08:45:08 +00005979 Filter__clear(thr->filter, "libhb_Thr_resumes");
5980 /* A kludge, but .. if this thread doesn't have any marker stacks
5981 at all, get one right now. This is easier than figuring out
5982 exactly when at thread startup we can and can't take a stack
5983 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00005984 if (HG_(clo_history_level) == 1) {
5985 tl_assert(thr->local_Kws_n_stacks);
5986 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
5987 note_local_Kw_n_stack_for(thr);
5988 }
sewardj23f12002009-07-24 08:45:08 +00005989}
5990
5991
5992/////////////////////////////////////////////////////////
5993// //
sewardjf98e1c02008-10-25 16:22:41 +00005994// Synchronisation objects //
5995// //
5996/////////////////////////////////////////////////////////
5997
sewardjffce8152011-06-24 10:09:41 +00005998/* A double linked list of all the SO's. */
5999SO* admin_SO = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006000
sewardjffce8152011-06-24 10:09:41 +00006001static SO* SO__Alloc ( void )
6002{
sewardjf98e1c02008-10-25 16:22:41 +00006003 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6004 so->viR = VtsID_INVALID;
6005 so->viW = VtsID_INVALID;
6006 so->magic = SO_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00006007 /* Add to double linked list */
6008 if (admin_SO) {
6009 tl_assert(admin_SO->admin_prev == NULL);
6010 admin_SO->admin_prev = so;
6011 so->admin_next = admin_SO;
6012 } else {
6013 so->admin_next = NULL;
6014 }
6015 so->admin_prev = NULL;
6016 admin_SO = so;
6017 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006018 return so;
6019}
sewardjffce8152011-06-24 10:09:41 +00006020
6021static void SO__Dealloc ( SO* so )
6022{
sewardjf98e1c02008-10-25 16:22:41 +00006023 tl_assert(so);
6024 tl_assert(so->magic == SO_MAGIC);
6025 if (so->viR == VtsID_INVALID) {
6026 tl_assert(so->viW == VtsID_INVALID);
6027 } else {
6028 tl_assert(so->viW != VtsID_INVALID);
6029 VtsID__rcdec(so->viR);
6030 VtsID__rcdec(so->viW);
6031 }
6032 so->magic = 0;
sewardjffce8152011-06-24 10:09:41 +00006033 /* Del from double linked list */
6034 if (so->admin_prev)
6035 so->admin_prev->admin_next = so->admin_next;
6036 if (so->admin_next)
6037 so->admin_next->admin_prev = so->admin_prev;
6038 if (so == admin_SO)
6039 admin_SO = so->admin_next;
6040 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006041 HG_(free)( so );
6042}
6043
6044
6045/////////////////////////////////////////////////////////
6046// //
6047// Top Level API //
6048// //
6049/////////////////////////////////////////////////////////
6050
florian6bd9dc12012-11-23 16:17:43 +00006051static void show_thread_state ( const HChar* str, Thr* t )
sewardjf98e1c02008-10-25 16:22:41 +00006052{
6053 if (1) return;
6054 if (t->viR == t->viW) {
6055 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6056 VtsID__pp( t->viR );
6057 VG_(printf)("%s","\n");
6058 } else {
6059 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6060 VtsID__pp( t->viR );
6061 VG_(printf)(" viW %u==", t->viW);
6062 VtsID__pp( t->viW );
6063 VG_(printf)("%s","\n");
6064 }
6065}
6066
6067
6068Thr* libhb_init (
6069 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00006070 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00006071 )
6072{
6073 Thr* thr;
6074 VtsID vi;
sewardje4cce742011-02-24 15:25:24 +00006075
6076 // We will have to have to store a large number of these,
6077 // so make sure they're the size we expect them to be.
6078 tl_assert(sizeof(ScalarTS) == 8);
sewardjffce8152011-06-24 10:09:41 +00006079
6080 /* because first 1024 unusable */
6081 tl_assert(SCALARTS_N_THRBITS >= 11);
6082 /* so as to fit in a UInt w/ 3 bits to spare (see defn of
6083 Thr_n_RCEC). */
6084 tl_assert(SCALARTS_N_THRBITS <= 29);
6085
6086 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6087 (32-bit). It's not correctness-critical, but there are a lot of
6088 them, so it's important from a space viewpoint. Unfortunately
6089 we simply can't pack it into 2 words on a 32-bit target. */
6090 if (sizeof(UWord) == 8) {
6091 tl_assert(sizeof(Thr_n_RCEC) == 16);
6092 } else {
6093 tl_assert(sizeof(Thr_n_RCEC) == 12);
6094 }
6095
6096 /* Word sets really are 32 bits. Even on a 64 bit target. */
6097 tl_assert(sizeof(WordSetID) == 4);
6098 tl_assert(sizeof(WordSet) == sizeof(WordSetID));
sewardje4cce742011-02-24 15:25:24 +00006099
sewardjf98e1c02008-10-25 16:22:41 +00006100 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00006101 tl_assert(get_EC);
6102 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00006103 main_get_EC = get_EC;
6104
6105 // No need to initialise hg_wordfm.
6106 // No need to initialise hg_wordset.
6107
sewardj7aa38a92011-02-27 23:04:12 +00006108 /* Allocated once and never deallocated. Used as a temporary in
6109 VTS singleton, tick and join operations. */
6110 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6111 temp_max_sized_VTS->id = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00006112 verydead_thread_table_init();
sewardjf98e1c02008-10-25 16:22:41 +00006113 vts_set_init();
6114 vts_tab_init();
6115 event_map_init();
6116 VtsID__invalidate_caches();
6117
6118 // initialise shadow memory
6119 zsm_init( SVal__rcinc, SVal__rcdec );
6120
6121 thr = Thr__new();
6122 vi = VtsID__mk_Singleton( thr, 1 );
6123 thr->viR = vi;
6124 thr->viW = vi;
6125 VtsID__rcinc(thr->viR);
6126 VtsID__rcinc(thr->viW);
6127
6128 show_thread_state(" root", thr);
6129 return thr;
6130}
6131
sewardj23f12002009-07-24 08:45:08 +00006132
sewardjf98e1c02008-10-25 16:22:41 +00006133Thr* libhb_create ( Thr* parent )
6134{
6135 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6136 the child's index. Since the child's index is guaranteed
6137 unique, it has never been seen before, so the implicit value
6138 before the tick is zero and after that is one. */
6139 Thr* child = Thr__new();
6140
6141 child->viR = VtsID__tick( parent->viR, child );
6142 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00006143 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00006144 VtsID__rcinc(child->viR);
6145 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006146 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00006147 early for that - it may not have a valid TId yet. So, let
6148 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00006149
6150 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6151 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6152
6153 /* and the parent has to move along too */
6154 VtsID__rcdec(parent->viR);
6155 VtsID__rcdec(parent->viW);
6156 parent->viR = VtsID__tick( parent->viR, parent );
6157 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00006158 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00006159 VtsID__rcinc(parent->viR);
6160 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006161 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00006162
6163 show_thread_state(" child", child);
6164 show_thread_state("parent", parent);
6165
6166 return child;
6167}
6168
6169/* Shut down the library, and print stats (in fact that's _all_
6170 this is for. */
6171void libhb_shutdown ( Bool show_stats )
6172{
6173 if (show_stats) {
6174 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6175 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6176 stats__secmaps_allocd,
6177 stats__secmap_ga_space_covered);
6178 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6179 stats__secmap_linesZ_allocd,
6180 stats__secmap_linesZ_bytes);
6181 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
6182 stats__secmap_linesF_allocd,
6183 stats__secmap_linesF_bytes);
6184 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
6185 stats__secmap_iterator_steppings);
6186 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6187 stats__secmaps_search, stats__secmaps_search_slow);
6188
6189 VG_(printf)("%s","\n");
6190 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6191 stats__cache_totrefs, stats__cache_totmisses );
6192 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6193 stats__cache_Z_fetches, stats__cache_F_fetches );
6194 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6195 stats__cache_Z_wbacks, stats__cache_F_wbacks );
6196 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
6197 stats__cache_invals, stats__cache_flushes );
6198 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6199 stats__cache_make_New_arange,
6200 stats__cache_make_New_inZrep);
6201
6202 VG_(printf)("%s","\n");
6203 VG_(printf)(" cline: %'10lu normalises\n",
6204 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00006205 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6206 stats__cline_cread64s,
6207 stats__cline_cread32s,
6208 stats__cline_cread16s,
6209 stats__cline_cread08s );
6210 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6211 stats__cline_cwrite64s,
6212 stats__cline_cwrite32s,
6213 stats__cline_cwrite16s,
6214 stats__cline_cwrite08s );
6215 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6216 stats__cline_swrite64s,
6217 stats__cline_swrite32s,
6218 stats__cline_swrite16s,
6219 stats__cline_swrite08s );
6220 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6221 stats__cline_sread08s, stats__cline_scopy08s );
sewardjf98e1c02008-10-25 16:22:41 +00006222 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
6223 stats__cline_64to32splits,
6224 stats__cline_32to16splits,
6225 stats__cline_16to8splits );
6226 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
6227 stats__cline_64to32pulldown,
6228 stats__cline_32to16pulldown,
6229 stats__cline_16to8pulldown );
6230 if (0)
6231 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
6232 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
6233
6234 VG_(printf)("%s","\n");
6235
sewardjc8028ad2010-05-05 09:34:42 +00006236 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006237 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00006238 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006239 stats__msmcwrite, stats__msmcwrite_change);
6240 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6241 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00006242 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6243 stats__join2_queries, stats__join2_misses);
6244
6245 VG_(printf)("%s","\n");
sewardjc8028ad2010-05-05 09:34:42 +00006246 VG_(printf)( " libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6247 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6248 VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6249 stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
sewardj7aa38a92011-02-27 23:04:12 +00006250 VG_(printf)( " libhb: VTSset: find__or__clone_and_add %'lu (%'lu allocd)\n",
6251 stats__vts_set__focaa, stats__vts_set__focaa_a );
sewardjc8028ad2010-05-05 09:34:42 +00006252 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6253 stats__vts__indexat_slow );
6254
6255 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00006256 VG_(printf)(
6257 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6258 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6259 );
6260 VG_(printf)( " libhb: %lu entries in vts_set\n",
6261 VG_(sizeFM)( vts_set ) );
6262
6263 VG_(printf)("%s","\n");
6264 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
6265 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
6266 stats__ctxt_rcdec2,
6267 stats__ctxt_rcdec3 );
6268 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
6269 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
6270 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
6271 (UWord)N_RCEC_TAB,
6272 stats__ctxt_tab_curr );
6273 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6274 stats__ctxt_tab_qs,
6275 stats__ctxt_tab_cmps );
6276#if 0
6277 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6278 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6279 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6280 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6281 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6282 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6283 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6284 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6285 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6286 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6287 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6288 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6289 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6290 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6291
6292 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6293 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6294 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6295 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6296#endif
6297
6298 VG_(printf)("%s","<<< END libhb stats >>>\n");
6299 VG_(printf)("%s","\n");
6300
6301 }
6302}
6303
sewardjffce8152011-06-24 10:09:41 +00006304/* Receive notification that a thread has low level exited. The
6305 significance here is that we do not expect to see any more memory
6306 references from it. */
sewardjf98e1c02008-10-25 16:22:41 +00006307void libhb_async_exit ( Thr* thr )
6308{
sewardj23f12002009-07-24 08:45:08 +00006309 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006310 tl_assert(!thr->llexit_done);
6311 thr->llexit_done = True;
sewardj2d2ea2f2009-08-02 10:15:07 +00006312
6313 /* free up Filter and local_Kws_n_stacks (well, actually not the
6314 latter ..) */
6315 tl_assert(thr->filter);
6316 HG_(free)(thr->filter);
6317 thr->filter = NULL;
6318
sewardjffce8152011-06-24 10:09:41 +00006319 /* Tell the VTS mechanism this thread has exited, so it can
6320 participate in VTS pruning. Note this can only happen if the
6321 thread has both ll_exited and has been joined with. */
6322 if (thr->joinedwith_done)
6323 VTS__declare_thread_very_dead(thr);
6324
sewardj2d2ea2f2009-08-02 10:15:07 +00006325 /* Another space-accuracy tradeoff. Do we want to be able to show
6326 H1 history for conflicts in threads which have since exited? If
6327 yes, then we better not free up thr->local_Kws_n_stacks. The
6328 downside is a potential per-thread leak of up to
6329 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6330 XArray average overcommit factor is (1.5 I'd guess). */
6331 // hence:
6332 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6333 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006334}
6335
sewardjffce8152011-06-24 10:09:41 +00006336/* Receive notification that a thread has been joined with. The
6337 significance here is that we do not expect to see any further
6338 references to its vector clocks (Thr::viR and Thr::viW). */
6339void libhb_joinedwith_done ( Thr* thr )
6340{
6341 tl_assert(thr);
6342 /* Caller must ensure that this is only ever called once per Thr. */
6343 tl_assert(!thr->joinedwith_done);
6344 thr->joinedwith_done = True;
6345 if (thr->llexit_done)
6346 VTS__declare_thread_very_dead(thr);
6347}
6348
6349
sewardjf98e1c02008-10-25 16:22:41 +00006350/* Both Segs and SOs point to VTSs. However, there is no sharing, so
6351 a Seg that points at a VTS is its one-and-only owner, and ditto for
6352 a SO that points at a VTS. */
6353
6354SO* libhb_so_alloc ( void )
6355{
6356 return SO__Alloc();
6357}
6358
6359void libhb_so_dealloc ( SO* so )
6360{
6361 tl_assert(so);
6362 tl_assert(so->magic == SO_MAGIC);
6363 SO__Dealloc(so);
6364}
6365
6366/* See comments in libhb.h for details on the meaning of
6367 strong vs weak sends and strong vs weak receives. */
6368void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6369{
6370 /* Copy the VTSs from 'thr' into the sync object, and then move
6371 the thread along one step. */
6372
6373 tl_assert(so);
6374 tl_assert(so->magic == SO_MAGIC);
6375
6376 /* stay sane .. a thread's read-clock must always lead or be the
6377 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00006378 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6379 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00006380 }
6381
6382 /* since we're overwriting the VtsIDs in the SO, we need to drop
6383 any references made by the previous contents thereof */
6384 if (so->viR == VtsID_INVALID) {
6385 tl_assert(so->viW == VtsID_INVALID);
6386 so->viR = thr->viR;
6387 so->viW = thr->viW;
6388 VtsID__rcinc(so->viR);
6389 VtsID__rcinc(so->viW);
6390 } else {
6391 /* In a strong send, we dump any previous VC in the SO and
6392 install the sending thread's VC instead. For a weak send we
6393 must join2 with what's already there. */
6394 tl_assert(so->viW != VtsID_INVALID);
6395 VtsID__rcdec(so->viR);
6396 VtsID__rcdec(so->viW);
6397 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6398 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6399 VtsID__rcinc(so->viR);
6400 VtsID__rcinc(so->viW);
6401 }
6402
6403 /* move both parent clocks along */
6404 VtsID__rcdec(thr->viR);
6405 VtsID__rcdec(thr->viW);
6406 thr->viR = VtsID__tick( thr->viR, thr );
6407 thr->viW = VtsID__tick( thr->viW, thr );
sewardjffce8152011-06-24 10:09:41 +00006408 if (!thr->llexit_done) {
sewardj2d2ea2f2009-08-02 10:15:07 +00006409 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00006410 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006411 }
sewardjf98e1c02008-10-25 16:22:41 +00006412 VtsID__rcinc(thr->viR);
6413 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006414
sewardjf98e1c02008-10-25 16:22:41 +00006415 if (strong_send)
6416 show_thread_state("s-send", thr);
6417 else
6418 show_thread_state("w-send", thr);
6419}
6420
6421void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6422{
6423 tl_assert(so);
6424 tl_assert(so->magic == SO_MAGIC);
6425
6426 if (so->viR != VtsID_INVALID) {
6427 tl_assert(so->viW != VtsID_INVALID);
6428
6429 /* Weak receive (basically, an R-acquisition of a R-W lock).
6430 This advances the read-clock of the receiver, but not the
6431 write-clock. */
6432 VtsID__rcdec(thr->viR);
6433 thr->viR = VtsID__join2( thr->viR, so->viR );
6434 VtsID__rcinc(thr->viR);
6435
sewardj90eb22e2009-07-28 20:22:18 +00006436 /* At one point (r10589) it seemed safest to tick the clocks for
6437 the receiving thread after the join. But on reflection, I
6438 wonder if that might cause it to 'overtake' constraints,
6439 which could lead to missing races. So, back out that part of
6440 r10589. */
6441 //VtsID__rcdec(thr->viR);
6442 //thr->viR = VtsID__tick( thr->viR, thr );
6443 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00006444
sewardjf98e1c02008-10-25 16:22:41 +00006445 /* For a strong receive, we also advance the receiver's write
6446 clock, which means the receive as a whole is essentially
6447 equivalent to a W-acquisition of a R-W lock. */
6448 if (strong_recv) {
6449 VtsID__rcdec(thr->viW);
6450 thr->viW = VtsID__join2( thr->viW, so->viW );
6451 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006452
sewardj90eb22e2009-07-28 20:22:18 +00006453 /* See comment just above, re r10589. */
6454 //VtsID__rcdec(thr->viW);
6455 //thr->viW = VtsID__tick( thr->viW, thr );
6456 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00006457 }
6458
sewardjf4845dc2010-05-28 20:09:59 +00006459 if (thr->filter)
6460 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00006461 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00006462
sewardjf98e1c02008-10-25 16:22:41 +00006463 if (strong_recv)
6464 show_thread_state("s-recv", thr);
6465 else
6466 show_thread_state("w-recv", thr);
6467
6468 } else {
6469 tl_assert(so->viW == VtsID_INVALID);
6470 /* Deal with degenerate case: 'so' has no vts, so there has been
6471 no message posted to it. Just ignore this case. */
6472 show_thread_state("d-recv", thr);
6473 }
6474}
6475
6476Bool libhb_so_everSent ( SO* so )
6477{
6478 if (so->viR == VtsID_INVALID) {
6479 tl_assert(so->viW == VtsID_INVALID);
6480 return False;
6481 } else {
6482 tl_assert(so->viW != VtsID_INVALID);
6483 return True;
6484 }
6485}
6486
6487#define XXX1 0 // 0x67a106c
6488#define XXX2 0
6489
sewardj23f12002009-07-24 08:45:08 +00006490static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00006491 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6492 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6493 return False;
6494}
florian6bd9dc12012-11-23 16:17:43 +00006495static void trace ( Thr* thr, Addr a, SizeT szB, const const HChar* s )
florian6bf37262012-10-21 03:23:36 +00006496{
sewardj23f12002009-07-24 08:45:08 +00006497 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00006498 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6499 show_thread_state("", thr);
6500 VG_(printf)("%s","\n");
6501}
6502
sewardj23f12002009-07-24 08:45:08 +00006503void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006504{
6505 SVal sv = SVal__mkC(thr->viW, thr->viW);
6506 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00006507 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6508 zsm_sset_range( a, szB, sv );
6509 Filter__clear_range( thr->filter, a, szB );
6510 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00006511}
6512
sewardjfd35d492011-03-17 19:39:55 +00006513void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006514{
sewardj23f12002009-07-24 08:45:08 +00006515 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00006516}
6517
sewardjfd35d492011-03-17 19:39:55 +00006518void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6519{
6520 /* This really does put the requested range in NoAccess. It's
6521 expensive though. */
6522 SVal sv = SVal_NOACCESS;
6523 tl_assert(is_sane_SVal_C(sv));
6524 zsm_sset_range( a, szB, sv );
6525 Filter__clear_range( thr->filter, a, szB );
6526}
6527
sewardj406bac82010-03-03 23:03:40 +00006528void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
6529{
6530 SVal sv = SVal_NOACCESS;
6531 tl_assert(is_sane_SVal_C(sv));
6532 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
6533 zsm_sset_range( a, szB, sv );
6534 Filter__clear_range( thr->filter, a, szB );
6535 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
6536}
6537
sewardj0b20a152011-03-10 21:34:21 +00006538Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
sewardjf98e1c02008-10-25 16:22:41 +00006539 tl_assert(thr);
sewardj60626642011-03-10 15:14:37 +00006540 return thr->hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006541}
6542
sewardj0b20a152011-03-10 21:34:21 +00006543void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
sewardjf98e1c02008-10-25 16:22:41 +00006544 tl_assert(thr);
sewardj0b20a152011-03-10 21:34:21 +00006545 thr->hgthread = hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006546}
6547
sewardj23f12002009-07-24 08:45:08 +00006548void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00006549{
sewardj23f12002009-07-24 08:45:08 +00006550 zsm_scopy_range(src, dst, len);
6551 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00006552}
6553
6554void libhb_maybe_GC ( void )
6555{
6556 event_map_maybe_GC();
6557 /* If there are still freelist entries available, no need for a
6558 GC. */
6559 if (vts_tab_freelist != VtsID_INVALID)
6560 return;
6561 /* So all the table entries are full, and we're having to expand
6562 the table. But did we hit the threshhold point yet? */
6563 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
6564 return;
6565 vts_tab__do_GC( False/*don't show stats*/ );
6566}
6567
6568
6569/////////////////////////////////////////////////////////////////
6570/////////////////////////////////////////////////////////////////
6571// //
6572// SECTION END main library //
6573// //
6574/////////////////////////////////////////////////////////////////
6575/////////////////////////////////////////////////////////////////
6576
6577/*--------------------------------------------------------------------*/
6578/*--- end libhb_main.c ---*/
6579/*--------------------------------------------------------------------*/