blob: 0e3f90c9ee08cee4f570e24699eac0d4159f855a [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
sewardj9eecbbb2010-05-03 21:37:12 +000012 Copyright (C) 2008-2010 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000039#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000040#include "pub_tool_xarray.h"
41#include "pub_tool_oset.h"
42#include "pub_tool_threadstate.h"
43#include "pub_tool_aspacemgr.h"
44#include "pub_tool_execontext.h"
45#include "pub_tool_errormgr.h"
sewardj5e2ac3b2009-08-11 10:39:25 +000046#include "pub_tool_options.h" // VG_(clo_stats)
sewardjf98e1c02008-10-25 16:22:41 +000047#include "hg_basics.h"
48#include "hg_wordset.h"
49#include "hg_lock_n_thread.h"
50#include "hg_errors.h"
51
52#include "libhb.h"
53
54
sewardj8f5374e2008-12-07 11:40:17 +000055/////////////////////////////////////////////////////////////////
56/////////////////////////////////////////////////////////////////
57// //
58// Debugging #defines //
59// //
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62
63/* Check the sanity of shadow values in the core memory state
64 machine. Change #if 0 to #if 1 to enable this. */
65#if 0
66# define CHECK_MSM 1
67#else
68# define CHECK_MSM 0
69#endif
70
71
72/* Check sanity (reference counts, etc) in the conflicting access
73 machinery. Change #if 0 to #if 1 to enable this. */
74#if 0
75# define CHECK_CEM 1
76#else
77# define CHECK_CEM 0
78#endif
79
80
81/* Check sanity in the compressed shadow memory machinery,
82 particularly in its caching innards. Unfortunately there's no
83 almost-zero-cost way to make them selectable at run time. Hence
84 set the #if 0 to #if 1 and rebuild if you want them. */
85#if 0
86# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
87# define inline __attribute__((noinline))
88 /* probably want to ditch -fomit-frame-pointer too */
89#else
90# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
91#endif
92
93
94/////////////////////////////////////////////////////////////////
95/////////////////////////////////////////////////////////////////
96// //
sewardjffce8152011-06-24 10:09:41 +000097// data decls: VtsID //
98// //
99/////////////////////////////////////////////////////////////////
100/////////////////////////////////////////////////////////////////
101
102/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
103 bits, since they have to be packed into the lowest 30 bits of an
104 SVal. */
105typedef UInt VtsID;
106#define VtsID_INVALID 0xFFFFFFFF
107
108
109
110/////////////////////////////////////////////////////////////////
111/////////////////////////////////////////////////////////////////
112// //
113// data decls: SVal //
114// //
115/////////////////////////////////////////////////////////////////
116/////////////////////////////////////////////////////////////////
117
118typedef ULong SVal;
119
120/* This value has special significance to the implementation, and callers
121 may not store it in the shadow memory. */
122#define SVal_INVALID (3ULL << 62)
123
124/* This is the default value for shadow memory. Initially the shadow
125 memory contains no accessible areas and so all reads produce this
126 value. TODO: make this caller-defineable. */
127#define SVal_NOACCESS (2ULL << 62)
128
129
130
131/////////////////////////////////////////////////////////////////
132/////////////////////////////////////////////////////////////////
133// //
134// data decls: ScalarTS //
135// //
136/////////////////////////////////////////////////////////////////
137/////////////////////////////////////////////////////////////////
138
139/* Scalar Timestamp. We have to store a lot of these, so there is
140 some effort to make them as small as possible. Logically they are
141 a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
142 We pack it into 64 bits by representing the Thr* using a ThrID, a
143 small integer (18 bits), and a 46 bit integer for the timestamp
144 number. The 46/18 split is arbitary, but has the effect that
145 Helgrind can only handle programs that create 2^18 or fewer threads
146 over their entire lifetime, and have no more than 2^46 timestamp
147 ticks (synchronisation operations on the same thread).
148
149 This doesn't seem like much of a limitation. 2^46 ticks is
150 7.06e+13, and if each tick (optimistically) takes the machine 1000
151 cycles to process, then the minimum time to process that many ticks
152 at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
153 but VTS ticks, which isn't realistic.
154
155 NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
156 32 since a ThrID is a UInt. 29 comes from the fact that
157 'Thr_n_RCEC', which records information about old accesses, packs
158 not only a ThrID but also 2+1 other bits (access size and
159 writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
160
161 NB2: thrid values are issued upwards from 1024, and values less
162 than that aren't valid. This isn't per se necessary (any order
163 will do, so long as they are unique), but it does help ensure they
164 are less likely to get confused with the various other kinds of
165 small-integer thread ids drifting around (eg, TId). See also NB5.
166
167 NB3: this probably also relies on the fact that Thr's are never
168 deallocated -- they exist forever. Hence the 1-1 mapping from
169 Thr's to thrid values (set up in Thr__new) persists forever.
170
171 NB4: temp_max_sized_VTS is allocated at startup and never freed.
172 It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
173 ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
174 making the memory use for this go sky-high. With
175 SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
176 like an OK tradeoff. If more than 256k threads need to be
177 supported, we could change SCALARTS_N_THRBITS to 20, which would
178 facilitate supporting 1 million threads at the cost of 8MB storage
179 for temp_max_sized_VTS.
180
181 NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
182 ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
183 must never be a valid ThrID. Given NB2 that's OK.
184*/
185#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
186
187#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
188typedef
189 struct {
190 ThrID thrid : SCALARTS_N_THRBITS;
191 ULong tym : SCALARTS_N_TYMBITS;
192 }
193 ScalarTS;
194
195#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
196
197
198
199/////////////////////////////////////////////////////////////////
200/////////////////////////////////////////////////////////////////
201// //
202// data decls: Filter //
203// //
204/////////////////////////////////////////////////////////////////
205/////////////////////////////////////////////////////////////////
206
207// baseline: 5, 9
208#define FI_LINE_SZB_LOG2 5
209#define FI_NUM_LINES_LOG2 10
210
211#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
212#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
213
214#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
215#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
216
217#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
218 & (Addr)(FI_NUM_LINES-1) )
219
220
221/* In the lines, each 8 bytes are treated individually, and are mapped
222 to a UShort. Regardless of endianness of the underlying machine,
223 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
224 the highest address.
225
226 Of each bit pair, the higher numbered bit is set if a R has been
227 seen, so the actual layout is:
228
229 15 14 ... 01 00
230
231 R W for addr+7 ... R W for addr+0
232
233 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
234*/
235
236/* tags are separated from lines. tags are Addrs and are
237 the base address of the line. */
238typedef
239 struct {
240 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
241 }
242 FiLine;
243
244typedef
245 struct {
246 Addr tags[FI_NUM_LINES];
247 FiLine lines[FI_NUM_LINES];
248 }
249 Filter;
250
251
252
253/////////////////////////////////////////////////////////////////
254/////////////////////////////////////////////////////////////////
255// //
256// data decls: Thr, ULong_n_EC //
257// //
258/////////////////////////////////////////////////////////////////
259/////////////////////////////////////////////////////////////////
260
261// Records stacks for H1 history mechanism (DRD-style)
262typedef
263 struct { ULong ull; ExeContext* ec; }
264 ULong_n_EC;
265
266
267/* How many of the above records to collect for each thread? Older
268 ones are dumped when we run out of space. 62.5k requires 1MB per
269 thread, since each ULong_n_EC record is 16 bytes long. When more
270 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
271 deleted to make space. Hence in the worst case we will be able to
272 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
273 Kw transitions (segments in this thread). For the current setting
274 that gives a guaranteed stack for at least the last 31.25k
275 segments. */
276#define N_KWs_N_STACKs_PER_THREAD 62500
277
278
279struct _Thr {
280 /* Current VTSs for this thread. They change as we go along. viR
281 is the VTS to be used for reads, viW for writes. Usually they
282 are the same, but can differ when we deal with reader-writer
283 locks. It is always the case that
284 VtsID__cmpLEQ(viW,viR) == True
285 that is, viW must be the same, or lagging behind, viR. */
286 VtsID viR;
287 VtsID viW;
288
289 /* Is initially False, and is set to True after the thread really
290 has done a low-level exit. When True, we expect to never see
291 any more memory references done by this thread. */
292 Bool llexit_done;
293
294 /* Is initially False, and is set to True after the thread has been
295 joined with (reaped by some other thread). After this point, we
296 do not expect to see any uses of .viR or .viW, so it is safe to
297 set them to VtsID_INVALID. */
298 Bool joinedwith_done;
299
300 /* A small integer giving a unique identity to this Thr. See
301 comments on the definition of ScalarTS for details. */
302 ThrID thrid : SCALARTS_N_THRBITS;
303
304 /* A filter that removes references for which we believe that
305 msmcread/msmcwrite will not change the state, nor report a
306 race. */
307 Filter* filter;
308
309 /* A pointer back to the top level Thread structure. There is a
310 1-1 mapping between Thread and Thr structures -- each Thr points
311 at its corresponding Thread, and vice versa. Really, Thr and
312 Thread should be merged into a single structure. */
313 Thread* hgthread;
314
315 /* The ULongs (scalar Kws) in this accumulate in strictly
316 increasing order, without duplicates. This is important because
317 we need to be able to find a given scalar Kw in this array
318 later, by binary search. */
319 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
320};
321
322
323
324/////////////////////////////////////////////////////////////////
325/////////////////////////////////////////////////////////////////
326// //
327// data decls: SO //
328// //
329/////////////////////////////////////////////////////////////////
330/////////////////////////////////////////////////////////////////
331
332// (UInt) `echo "Synchronisation object" | md5sum`
333#define SO_MAGIC 0x56b3c5b0U
334
335struct _SO {
336 struct _SO* admin_prev;
337 struct _SO* admin_next;
338 VtsID viR; /* r-clock of sender */
339 VtsID viW; /* w-clock of sender */
340 UInt magic;
341};
342
343
344
345/////////////////////////////////////////////////////////////////
346/////////////////////////////////////////////////////////////////
347// //
sewardj8f5374e2008-12-07 11:40:17 +0000348// Forward declarations //
349// //
350/////////////////////////////////////////////////////////////////
351/////////////////////////////////////////////////////////////////
352
sewardjf98e1c02008-10-25 16:22:41 +0000353/* fwds for
354 Globals needed by other parts of the library. These are set
355 once at startup and then never changed. */
356static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000357static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000358
sewardjffce8152011-06-24 10:09:41 +0000359/* misc fn and data fwdses */
360static void VtsID__rcinc ( VtsID ii );
361static void VtsID__rcdec ( VtsID ii );
362
363static inline Bool SVal__isC ( SVal s );
364static inline VtsID SVal__unC_Rmin ( SVal s );
365static inline VtsID SVal__unC_Wmin ( SVal s );
366static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
367
368/* A double linked list of all the SO's. */
369SO* admin_SO;
370
sewardjf98e1c02008-10-25 16:22:41 +0000371
372
373/////////////////////////////////////////////////////////////////
374/////////////////////////////////////////////////////////////////
375// //
376// SECTION BEGIN compressed shadow memory //
377// //
378/////////////////////////////////////////////////////////////////
379/////////////////////////////////////////////////////////////////
380
381#ifndef __HB_ZSM_H
382#define __HB_ZSM_H
383
sewardjf98e1c02008-10-25 16:22:41 +0000384/* Initialise the library. Once initialised, it will (or may) call
385 rcinc and rcdec in response to all the calls below, in order to
386 allow the user to do reference counting on the SVals stored herein.
387 It is important to understand, however, that due to internal
388 caching, the reference counts are in general inaccurate, and can be
389 both above or below the true reference count for an item. In
390 particular, the library may indicate that the reference count for
391 an item is zero, when in fact it is not.
392
393 To make the reference counting exact and therefore non-pointless,
394 call zsm_flush_cache. Immediately after it returns, the reference
395 counts for all items, as deduced by the caller by observing calls
396 to rcinc and rcdec, will be correct, and so any items with a zero
397 reference count may be freed (or at least considered to be
398 unreferenced by this library).
399*/
400static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
401
sewardj23f12002009-07-24 08:45:08 +0000402static void zsm_sset_range ( Addr, SizeT, SVal );
403static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000404static void zsm_flush_cache ( void );
405
406#endif /* ! __HB_ZSM_H */
407
408
sewardjf98e1c02008-10-25 16:22:41 +0000409/* Round a up to the next multiple of N. N must be a power of 2 */
410#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
411/* Round a down to the next multiple of N. N must be a power of 2 */
412#define ROUNDDN(a, N) ((a) & ~(N-1))
413
414
415
416/* ------ User-supplied RC functions ------ */
417static void(*rcinc)(SVal) = NULL;
418static void(*rcdec)(SVal) = NULL;
419
420
421/* ------ CacheLine ------ */
422
423#define N_LINE_BITS 6 /* must be >= 3 */
424#define N_LINE_ARANGE (1 << N_LINE_BITS)
425#define N_LINE_TREES (N_LINE_ARANGE >> 3)
426
427typedef
428 struct {
429 UShort descrs[N_LINE_TREES];
430 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
431 }
432 CacheLine;
433
434#define TREE_DESCR_16_0 (1<<0)
435#define TREE_DESCR_32_0 (1<<1)
436#define TREE_DESCR_16_1 (1<<2)
437#define TREE_DESCR_64 (1<<3)
438#define TREE_DESCR_16_2 (1<<4)
439#define TREE_DESCR_32_1 (1<<5)
440#define TREE_DESCR_16_3 (1<<6)
441#define TREE_DESCR_8_0 (1<<7)
442#define TREE_DESCR_8_1 (1<<8)
443#define TREE_DESCR_8_2 (1<<9)
444#define TREE_DESCR_8_3 (1<<10)
445#define TREE_DESCR_8_4 (1<<11)
446#define TREE_DESCR_8_5 (1<<12)
447#define TREE_DESCR_8_6 (1<<13)
448#define TREE_DESCR_8_7 (1<<14)
449#define TREE_DESCR_DTY (1<<15)
450
451typedef
452 struct {
453 SVal dict[4]; /* can represent up to 4 diff values in the line */
454 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
455 dict indexes */
456 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
457 LineF to use, and dict[2..] are also SVal_INVALID. */
458 }
459 LineZ; /* compressed rep for a cache line */
460
461typedef
462 struct {
463 Bool inUse;
464 SVal w64s[N_LINE_ARANGE];
465 }
466 LineF; /* full rep for a cache line */
467
468/* Shadow memory.
469 Primary map is a WordFM Addr SecMap*.
470 SecMaps cover some page-size-ish section of address space and hold
471 a compressed representation.
472 CacheLine-sized chunks of SecMaps are copied into a Cache, being
473 decompressed when moved into the cache and recompressed on the
474 way out. Because of this, the cache must operate as a writeback
475 cache, not a writethrough one.
476
477 Each SecMap must hold a power-of-2 number of CacheLines. Hence
478 N_SECMAP_BITS must >= N_LINE_BITS.
479*/
480#define N_SECMAP_BITS 13
481#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
482
483// # CacheLines held by a SecMap
484#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
485
486/* The data in the SecMap is held in the array of LineZs. Each LineZ
487 either carries the required data directly, in a compressed
488 representation, or it holds (in .dict[0]) an index to the LineF in
489 .linesF that holds the full representation.
490
491 Currently-unused LineF's have their .inUse bit set to zero.
492 Since each in-use LineF is referred to be exactly one LineZ,
493 the number of .linesZ[] that refer to .linesF should equal
494 the number of .linesF[] that have .inUse == True.
495
496 RC obligations: the RCs presented to the user include exactly
497 the values in:
498 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
499 * F reps that are in use (.inUse == True)
500
501 Hence the following actions at the following transitions are required:
502
503 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
504 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
505 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
506 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
507*/
508typedef
509 struct {
510 UInt magic;
511 LineZ linesZ[N_SECMAP_ZLINES];
512 LineF* linesF;
513 UInt linesF_size;
514 }
515 SecMap;
516
517#define SecMap_MAGIC 0x571e58cbU
518
519static inline Bool is_sane_SecMap ( SecMap* sm ) {
520 return sm != NULL && sm->magic == SecMap_MAGIC;
521}
522
523/* ------ Cache ------ */
524
525#define N_WAY_BITS 16
526#define N_WAY_NENT (1 << N_WAY_BITS)
527
528/* Each tag is the address of the associated CacheLine, rounded down
529 to a CacheLine address boundary. A CacheLine size must be a power
530 of 2 and must be 8 or more. Hence an easy way to initialise the
531 cache so it is empty is to set all the tag values to any value % 8
532 != 0, eg 1. This means all queries in the cache initially miss.
533 It does however require us to detect and not writeback, any line
534 with a bogus tag. */
535typedef
536 struct {
537 CacheLine lyns0[N_WAY_NENT];
538 Addr tags0[N_WAY_NENT];
539 }
540 Cache;
541
542static inline Bool is_valid_scache_tag ( Addr tag ) {
543 /* a valid tag should be naturally aligned to the start of
544 a CacheLine. */
545 return 0 == (tag & (N_LINE_ARANGE - 1));
546}
547
548
549/* --------- Primary data structures --------- */
550
551/* Shadow memory primary map */
552static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
553static Cache cache_shmem;
554
555
556static UWord stats__secmaps_search = 0; // # SM finds
557static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
558static UWord stats__secmaps_allocd = 0; // # SecMaps issued
559static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
560static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
561static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
562static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
563static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
564static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
565static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
566static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
567static UWord stats__cache_F_fetches = 0; // # F lines fetched
568static UWord stats__cache_F_wbacks = 0; // # F lines written back
569static UWord stats__cache_invals = 0; // # cache invals
570static UWord stats__cache_flushes = 0; // # cache flushes
571static UWord stats__cache_totrefs = 0; // # total accesses
572static UWord stats__cache_totmisses = 0; // # misses
573static ULong stats__cache_make_New_arange = 0; // total arange made New
574static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
575static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000576static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
577static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
578static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
579static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
580static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
581static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
582static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
583static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
584static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
585static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
586static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
587static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
588static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
589static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000590static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
591static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
592static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
593static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
594static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
595static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
sewardjc8028ad2010-05-05 09:34:42 +0000596static UWord stats__vts__tick = 0; // # calls to VTS__tick
597static UWord stats__vts__join = 0; // # calls to VTS__join
598static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
599static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
sewardj7aa38a92011-02-27 23:04:12 +0000600
601// # calls to VTS__cmp_structural w/ slow case
602static UWord stats__vts__cmp_structural_slow = 0;
603
604// # calls to VTS__indexAt_SLOW
605static UWord stats__vts__indexat_slow = 0;
606
607// # calls to vts_set__find__or__clone_and_add
608static UWord stats__vts_set__focaa = 0;
609
610// # calls to vts_set__find__or__clone_and_add that lead to an
611// allocation
612static UWord stats__vts_set__focaa_a = 0;
sewardjc8028ad2010-05-05 09:34:42 +0000613
sewardjf98e1c02008-10-25 16:22:41 +0000614
615static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
616 return a & ~(N_SECMAP_ARANGE - 1);
617}
618static inline UWord shmem__get_SecMap_offset ( Addr a ) {
619 return a & (N_SECMAP_ARANGE - 1);
620}
621
622
623/*----------------------------------------------------------------*/
624/*--- map_shmem :: WordFM Addr SecMap ---*/
625/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
626/*----------------------------------------------------------------*/
627
628/*--------------- SecMap allocation --------------- */
629
630static HChar* shmem__bigchunk_next = NULL;
631static HChar* shmem__bigchunk_end1 = NULL;
632
633static void* shmem__bigchunk_alloc ( SizeT n )
634{
635 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
636 tl_assert(n > 0);
637 n = VG_ROUNDUP(n, 16);
638 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
639 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
640 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
641 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
642 if (0)
643 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
644 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
645 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
646 if (shmem__bigchunk_next == NULL)
647 VG_(out_of_memory_NORETURN)(
648 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
649 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
650 }
651 tl_assert(shmem__bigchunk_next);
652 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
653 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
654 shmem__bigchunk_next += n;
655 return shmem__bigchunk_next - n;
656}
657
658static SecMap* shmem__alloc_SecMap ( void )
659{
660 Word i, j;
661 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
662 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
663 tl_assert(sm);
664 sm->magic = SecMap_MAGIC;
665 for (i = 0; i < N_SECMAP_ZLINES; i++) {
666 sm->linesZ[i].dict[0] = SVal_NOACCESS;
667 sm->linesZ[i].dict[1] = SVal_INVALID;
668 sm->linesZ[i].dict[2] = SVal_INVALID;
669 sm->linesZ[i].dict[3] = SVal_INVALID;
670 for (j = 0; j < N_LINE_ARANGE/4; j++)
671 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
672 }
673 sm->linesF = NULL;
674 sm->linesF_size = 0;
675 stats__secmaps_allocd++;
676 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
677 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
678 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
679 return sm;
680}
681
682typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
683static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
684
685static SecMap* shmem__find_SecMap ( Addr ga )
686{
687 SecMap* sm = NULL;
688 Addr gaKey = shmem__round_to_SecMap_base(ga);
689 // Cache
690 stats__secmaps_search++;
691 if (LIKELY(gaKey == smCache[0].gaKey))
692 return smCache[0].sm;
693 if (LIKELY(gaKey == smCache[1].gaKey)) {
694 SMCacheEnt tmp = smCache[0];
695 smCache[0] = smCache[1];
696 smCache[1] = tmp;
697 return smCache[0].sm;
698 }
699 if (gaKey == smCache[2].gaKey) {
700 SMCacheEnt tmp = smCache[1];
701 smCache[1] = smCache[2];
702 smCache[2] = tmp;
703 return smCache[1].sm;
704 }
705 // end Cache
706 stats__secmaps_search_slow++;
707 if (VG_(lookupFM)( map_shmem,
708 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
709 tl_assert(sm != NULL);
710 smCache[2] = smCache[1];
711 smCache[1] = smCache[0];
712 smCache[0].gaKey = gaKey;
713 smCache[0].sm = sm;
714 } else {
715 tl_assert(sm == NULL);
716 }
717 return sm;
718}
719
720static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
721{
722 SecMap* sm = shmem__find_SecMap ( ga );
723 if (LIKELY(sm)) {
724 return sm;
725 } else {
726 /* create a new one */
727 Addr gaKey = shmem__round_to_SecMap_base(ga);
728 sm = shmem__alloc_SecMap();
729 tl_assert(sm);
730 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
731 return sm;
732 }
733}
734
735
736/* ------------ LineF and LineZ related ------------ */
737
738static void rcinc_LineF ( LineF* lineF ) {
739 UWord i;
740 tl_assert(lineF->inUse);
741 for (i = 0; i < N_LINE_ARANGE; i++)
742 rcinc(lineF->w64s[i]);
743}
744
745static void rcdec_LineF ( LineF* lineF ) {
746 UWord i;
747 tl_assert(lineF->inUse);
748 for (i = 0; i < N_LINE_ARANGE; i++)
749 rcdec(lineF->w64s[i]);
750}
751
752static void rcinc_LineZ ( LineZ* lineZ ) {
753 tl_assert(lineZ->dict[0] != SVal_INVALID);
754 rcinc(lineZ->dict[0]);
755 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
756 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
757 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
758}
759
760static void rcdec_LineZ ( LineZ* lineZ ) {
761 tl_assert(lineZ->dict[0] != SVal_INVALID);
762 rcdec(lineZ->dict[0]);
763 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
764 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
765 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
766}
767
768inline
769static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
770 Word bix, shft, mask, prep;
771 tl_assert(ix >= 0);
772 bix = ix >> 2;
773 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
774 mask = 3 << shft;
775 prep = b2 << shft;
776 arr[bix] = (arr[bix] & ~mask) | prep;
777}
778
779inline
780static UWord read_twobit_array ( UChar* arr, UWord ix ) {
781 Word bix, shft;
782 tl_assert(ix >= 0);
783 bix = ix >> 2;
784 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
785 return (arr[bix] >> shft) & 3;
786}
787
788/* Given address 'tag', find either the Z or F line containing relevant
789 data, so it can be read into the cache.
790*/
791static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
792 /*OUT*/LineF** fp, Addr tag ) {
793 LineZ* lineZ;
794 LineF* lineF;
795 UWord zix;
796 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
797 UWord smoff = shmem__get_SecMap_offset(tag);
798 /* since smoff is derived from a valid tag, it should be
799 cacheline-aligned. */
800 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
801 zix = smoff >> N_LINE_BITS;
802 tl_assert(zix < N_SECMAP_ZLINES);
803 lineZ = &sm->linesZ[zix];
804 lineF = NULL;
805 if (lineZ->dict[0] == SVal_INVALID) {
806 UInt fix = (UInt)lineZ->dict[1];
807 tl_assert(sm->linesF);
808 tl_assert(sm->linesF_size > 0);
809 tl_assert(fix >= 0 && fix < sm->linesF_size);
810 lineF = &sm->linesF[fix];
811 tl_assert(lineF->inUse);
812 lineZ = NULL;
813 }
814 *zp = lineZ;
815 *fp = lineF;
816}
817
818/* Given address 'tag', return the relevant SecMap and the index of
819 the LineZ within it, in the expectation that the line is to be
820 overwritten. Regardless of whether 'tag' is currently associated
821 with a Z or F representation, to rcdec on the current
822 representation, in recognition of the fact that the contents are
823 just about to be overwritten. */
824static __attribute__((noinline))
825void find_Z_for_writing ( /*OUT*/SecMap** smp,
826 /*OUT*/Word* zixp,
827 Addr tag ) {
828 LineZ* lineZ;
829 LineF* lineF;
830 UWord zix;
831 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
832 UWord smoff = shmem__get_SecMap_offset(tag);
833 /* since smoff is derived from a valid tag, it should be
834 cacheline-aligned. */
835 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
836 zix = smoff >> N_LINE_BITS;
837 tl_assert(zix < N_SECMAP_ZLINES);
838 lineZ = &sm->linesZ[zix];
839 lineF = NULL;
840 /* re RCs, we are freeing up this LineZ/LineF so that new data can
841 be parked in it. Hence have to rcdec it accordingly. */
842 /* If lineZ has an associated lineF, free it up. */
843 if (lineZ->dict[0] == SVal_INVALID) {
844 UInt fix = (UInt)lineZ->dict[1];
845 tl_assert(sm->linesF);
846 tl_assert(sm->linesF_size > 0);
847 tl_assert(fix >= 0 && fix < sm->linesF_size);
848 lineF = &sm->linesF[fix];
849 tl_assert(lineF->inUse);
850 rcdec_LineF(lineF);
851 lineF->inUse = False;
852 } else {
853 rcdec_LineZ(lineZ);
854 }
855 *smp = sm;
856 *zixp = zix;
857}
858
859static __attribute__((noinline))
860void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
861 UInt i, new_size;
862 LineF* nyu;
863
864 if (sm->linesF) {
865 tl_assert(sm->linesF_size > 0);
866 } else {
867 tl_assert(sm->linesF_size == 0);
868 }
869
870 if (sm->linesF) {
871 for (i = 0; i < sm->linesF_size; i++) {
872 if (!sm->linesF[i].inUse) {
873 *fixp = (Word)i;
874 return;
875 }
876 }
877 }
878
879 /* No free F line found. Expand existing array and try again. */
880 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
881 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
882 new_size * sizeof(LineF) );
883 tl_assert(nyu);
884
885 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
886 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
887 * sizeof(LineF);
888
889 if (0)
890 VG_(printf)("SM %p: expand F array from %d to %d\n",
891 sm, (Int)sm->linesF_size, new_size);
892
893 for (i = 0; i < new_size; i++)
894 nyu[i].inUse = False;
895
896 if (sm->linesF) {
897 for (i = 0; i < sm->linesF_size; i++) {
898 tl_assert(sm->linesF[i].inUse);
899 nyu[i] = sm->linesF[i];
900 }
901 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
902 HG_(free)(sm->linesF);
903 }
904
905 sm->linesF = nyu;
906 sm->linesF_size = new_size;
907
908 for (i = 0; i < sm->linesF_size; i++) {
909 if (!sm->linesF[i].inUse) {
910 *fixp = (Word)i;
911 return;
912 }
913 }
914
915 /*NOTREACHED*/
916 tl_assert(0);
917}
918
919
920/* ------------ CacheLine and implicit-tree related ------------ */
921
922__attribute__((unused))
923static void pp_CacheLine ( CacheLine* cl ) {
924 Word i;
925 if (!cl) {
926 VG_(printf)("%s","pp_CacheLine(NULL)\n");
927 return;
928 }
929 for (i = 0; i < N_LINE_TREES; i++)
930 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
931 for (i = 0; i < N_LINE_ARANGE; i++)
932 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
933}
934
935static UChar descr_to_validbits ( UShort descr )
936{
937 /* a.k.a Party Time for gcc's constant folder */
938# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
939 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
940 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
941 ( (b8_5) << 12) | ( (b8_4) << 11) | \
942 ( (b8_3) << 10) | ( (b8_2) << 9) | \
943 ( (b8_1) << 8) | ( (b8_0) << 7) | \
944 ( (b16_3) << 6) | ( (b32_1) << 5) | \
945 ( (b16_2) << 4) | ( (b64) << 3) | \
946 ( (b16_1) << 2) | ( (b32_0) << 1) | \
947 ( (b16_0) << 0) ) )
948
949# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
950 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
951 ( (bit5) << 5) | ( (bit4) << 4) | \
952 ( (bit3) << 3) | ( (bit2) << 2) | \
953 ( (bit1) << 1) | ( (bit0) << 0) ) )
954
955 /* these should all get folded out at compile time */
956 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
957 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
958 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
959 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
960 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
961 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
962 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
963 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
964 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
965
966 switch (descr) {
967 /*
968 +--------------------------------- TREE_DESCR_8_7
969 | +------------------- TREE_DESCR_8_0
970 | | +---------------- TREE_DESCR_16_3
971 | | | +-------------- TREE_DESCR_32_1
972 | | | | +------------ TREE_DESCR_16_2
973 | | | | | +--------- TREE_DESCR_64
974 | | | | | | +------ TREE_DESCR_16_1
975 | | | | | | | +---- TREE_DESCR_32_0
976 | | | | | | | | +-- TREE_DESCR_16_0
977 | | | | | | | | |
978 | | | | | | | | | GRANULARITY, 7 -> 0 */
979 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
980 return BYTE(1,1,1,1,1,1,1,1);
981 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
982 return BYTE(1,1,0,1,1,1,1,1);
983 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
984 return BYTE(0,1,1,1,1,1,1,1);
985 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
986 return BYTE(0,1,0,1,1,1,1,1);
987
988 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
989 return BYTE(1,1,1,1,1,1,0,1);
990 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
991 return BYTE(1,1,0,1,1,1,0,1);
992 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
993 return BYTE(0,1,1,1,1,1,0,1);
994 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
995 return BYTE(0,1,0,1,1,1,0,1);
996
997 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
998 return BYTE(1,1,1,1,0,1,1,1);
999 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
1000 return BYTE(1,1,0,1,0,1,1,1);
1001 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
1002 return BYTE(0,1,1,1,0,1,1,1);
1003 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
1004 return BYTE(0,1,0,1,0,1,1,1);
1005
1006 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
1007 return BYTE(1,1,1,1,0,1,0,1);
1008 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
1009 return BYTE(1,1,0,1,0,1,0,1);
1010 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
1011 return BYTE(0,1,1,1,0,1,0,1);
1012 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
1013 return BYTE(0,1,0,1,0,1,0,1);
1014
1015 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
1016 return BYTE(0,0,0,1,1,1,1,1);
1017 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
1018 return BYTE(0,0,0,1,1,1,0,1);
1019 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
1020 return BYTE(0,0,0,1,0,1,1,1);
1021 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
1022 return BYTE(0,0,0,1,0,1,0,1);
1023
1024 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
1025 return BYTE(1,1,1,1,0,0,0,1);
1026 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
1027 return BYTE(1,1,0,1,0,0,0,1);
1028 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
1029 return BYTE(0,1,1,1,0,0,0,1);
1030 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
1031 return BYTE(0,1,0,1,0,0,0,1);
1032
1033 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1034 return BYTE(0,0,0,1,0,0,0,1);
1035
1036 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1037 return BYTE(0,0,0,0,0,0,0,1);
1038
1039 default: return BYTE(0,0,0,0,0,0,0,0);
1040 /* INVALID - any valid descr produces at least one
1041 valid bit in tree[0..7]*/
1042 }
1043 /* NOTREACHED*/
1044 tl_assert(0);
1045
1046# undef DESCR
1047# undef BYTE
1048}
1049
1050__attribute__((unused))
1051static Bool is_sane_Descr ( UShort descr ) {
1052 return descr_to_validbits(descr) != 0;
1053}
1054
1055static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1056 VG_(sprintf)(dst,
1057 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1058 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1059 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1060 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1061 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1062 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1063 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1064 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1065 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1066 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1067 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1068 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1069 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
1070 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1071 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1072 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1073 );
1074}
1075static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1076 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1077 (Int)((byte & 128) ? 1 : 0),
1078 (Int)((byte & 64) ? 1 : 0),
1079 (Int)((byte & 32) ? 1 : 0),
1080 (Int)((byte & 16) ? 1 : 0),
1081 (Int)((byte & 8) ? 1 : 0),
1082 (Int)((byte & 4) ? 1 : 0),
1083 (Int)((byte & 2) ? 1 : 0),
1084 (Int)((byte & 1) ? 1 : 0)
1085 );
1086}
1087
1088static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1089 Word i;
1090 UChar validbits = descr_to_validbits(descr);
1091 HChar buf[128], buf2[128];
1092 if (validbits == 0)
1093 goto bad;
1094 for (i = 0; i < 8; i++) {
1095 if (validbits & (1<<i)) {
1096 if (tree[i] == SVal_INVALID)
1097 goto bad;
1098 } else {
1099 if (tree[i] != SVal_INVALID)
1100 goto bad;
1101 }
1102 }
1103 return True;
1104 bad:
1105 sprintf_Descr( buf, descr );
1106 sprintf_Byte( buf2, validbits );
1107 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1108 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
1109 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
1110 for (i = 0; i < 8; i++)
1111 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
1112 VG_(printf)("%s","}\n");
1113 return 0;
1114}
1115
1116static Bool is_sane_CacheLine ( CacheLine* cl )
1117{
1118 Word tno, cloff;
1119
1120 if (!cl) goto bad;
1121
1122 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1123 UShort descr = cl->descrs[tno];
1124 SVal* tree = &cl->svals[cloff];
1125 if (!is_sane_Descr_and_Tree(descr, tree))
1126 goto bad;
1127 }
1128 tl_assert(cloff == N_LINE_ARANGE);
1129 return True;
1130 bad:
1131 pp_CacheLine(cl);
1132 return False;
1133}
1134
1135static UShort normalise_tree ( /*MOD*/SVal* tree )
1136{
1137 UShort descr;
1138 /* pre: incoming tree[0..7] does not have any invalid shvals, in
1139 particular no zeroes. */
1140 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1141 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1142 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1143 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
1144 tl_assert(0);
1145
1146 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1147 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1148 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1149 /* build 16-bit layer */
1150 if (tree[1] == tree[0]) {
1151 tree[1] = SVal_INVALID;
1152 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1153 descr |= TREE_DESCR_16_0;
1154 }
1155 if (tree[3] == tree[2]) {
1156 tree[3] = SVal_INVALID;
1157 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1158 descr |= TREE_DESCR_16_1;
1159 }
1160 if (tree[5] == tree[4]) {
1161 tree[5] = SVal_INVALID;
1162 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1163 descr |= TREE_DESCR_16_2;
1164 }
1165 if (tree[7] == tree[6]) {
1166 tree[7] = SVal_INVALID;
1167 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1168 descr |= TREE_DESCR_16_3;
1169 }
1170 /* build 32-bit layer */
1171 if (tree[2] == tree[0]
1172 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1173 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1174 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1175 descr |= TREE_DESCR_32_0;
1176 }
1177 if (tree[6] == tree[4]
1178 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1179 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1180 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1181 descr |= TREE_DESCR_32_1;
1182 }
1183 /* build 64-bit layer */
1184 if (tree[4] == tree[0]
1185 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1186 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1187 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1188 descr |= TREE_DESCR_64;
1189 }
1190 return descr;
1191}
1192
1193/* This takes a cacheline where all the data is at the leaves
1194 (w8[..]) and builds a correctly normalised tree. */
1195static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1196{
1197 Word tno, cloff;
1198 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1199 SVal* tree = &cl->svals[cloff];
1200 cl->descrs[tno] = normalise_tree( tree );
1201 }
1202 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +00001203 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001204 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1205 stats__cline_normalises++;
1206}
1207
1208
1209typedef struct { UChar count; SVal sval; } CountedSVal;
1210
1211static
1212void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1213 /*OUT*/Word* dstUsedP,
1214 Word nDst, CacheLine* src )
1215{
1216 Word tno, cloff, dstUsed;
1217
1218 tl_assert(nDst == N_LINE_ARANGE);
1219 dstUsed = 0;
1220
1221 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
1222 UShort descr = src->descrs[tno];
1223 SVal* tree = &src->svals[cloff];
1224
1225 /* sequentialise the tree described by (descr,tree). */
1226# define PUT(_n,_v) \
1227 do { dst[dstUsed ].count = (_n); \
1228 dst[dstUsed++].sval = (_v); \
1229 } while (0)
1230
1231 /* byte 0 */
1232 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
1233 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1234 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1235 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
1236 /* byte 1 */
1237 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
1238 /* byte 2 */
1239 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1240 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
1241 /* byte 3 */
1242 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
1243 /* byte 4 */
1244 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1245 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1246 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
1247 /* byte 5 */
1248 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
1249 /* byte 6 */
1250 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1251 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
1252 /* byte 7 */
1253 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
1254
1255# undef PUT
1256 /* END sequentialise the tree described by (descr,tree). */
1257
1258 }
1259 tl_assert(cloff == N_LINE_ARANGE);
1260 tl_assert(dstUsed <= nDst);
1261
1262 *dstUsedP = dstUsed;
1263}
1264
1265/* Write the cacheline 'wix' to backing store. Where it ends up
1266 is determined by its tag field. */
1267static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1268{
1269 Word i, j, k, m;
1270 Addr tag;
1271 SecMap* sm;
1272 CacheLine* cl;
1273 LineZ* lineZ;
1274 LineF* lineF;
1275 Word zix, fix, csvalsUsed;
1276 CountedSVal csvals[N_LINE_ARANGE];
1277 SVal sv;
1278
1279 if (0)
1280 VG_(printf)("scache wback line %d\n", (Int)wix);
1281
1282 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1283
1284 tag = cache_shmem.tags0[wix];
1285 cl = &cache_shmem.lyns0[wix];
1286
1287 /* The cache line may have been invalidated; if so, ignore it. */
1288 if (!is_valid_scache_tag(tag))
1289 return;
1290
1291 /* Where are we going to put it? */
1292 sm = NULL;
1293 lineZ = NULL;
1294 lineF = NULL;
1295 zix = fix = -1;
1296
1297 /* find the Z line to write in and rcdec it or the associated F
1298 line. */
1299 find_Z_for_writing( &sm, &zix, tag );
1300
1301 tl_assert(sm);
1302 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1303 lineZ = &sm->linesZ[zix];
1304
1305 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001306 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001307 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1308
1309 csvalsUsed = -1;
1310 sequentialise_CacheLine( csvals, &csvalsUsed,
1311 N_LINE_ARANGE, cl );
1312 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1313 if (0) VG_(printf)("%lu ", csvalsUsed);
1314
1315 lineZ->dict[0] = lineZ->dict[1]
1316 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1317
1318 /* i indexes actual shadow values, k is cursor in csvals */
1319 i = 0;
1320 for (k = 0; k < csvalsUsed; k++) {
1321
1322 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001323 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001324 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1325 /* do we already have it? */
1326 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1327 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1328 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1329 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1330 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001331 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001332 tl_assert(sv != SVal_INVALID);
1333 if (lineZ->dict[0]
1334 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1335 if (lineZ->dict[1]
1336 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1337 if (lineZ->dict[2]
1338 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1339 if (lineZ->dict[3]
1340 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1341 break; /* we'll have to use the f rep */
1342 dict_ok:
1343 m = csvals[k].count;
1344 if (m == 8) {
1345 write_twobit_array( lineZ->ix2s, i+0, j );
1346 write_twobit_array( lineZ->ix2s, i+1, j );
1347 write_twobit_array( lineZ->ix2s, i+2, j );
1348 write_twobit_array( lineZ->ix2s, i+3, j );
1349 write_twobit_array( lineZ->ix2s, i+4, j );
1350 write_twobit_array( lineZ->ix2s, i+5, j );
1351 write_twobit_array( lineZ->ix2s, i+6, j );
1352 write_twobit_array( lineZ->ix2s, i+7, j );
1353 i += 8;
1354 }
1355 else if (m == 4) {
1356 write_twobit_array( lineZ->ix2s, i+0, j );
1357 write_twobit_array( lineZ->ix2s, i+1, j );
1358 write_twobit_array( lineZ->ix2s, i+2, j );
1359 write_twobit_array( lineZ->ix2s, i+3, j );
1360 i += 4;
1361 }
1362 else if (m == 1) {
1363 write_twobit_array( lineZ->ix2s, i+0, j );
1364 i += 1;
1365 }
1366 else if (m == 2) {
1367 write_twobit_array( lineZ->ix2s, i+0, j );
1368 write_twobit_array( lineZ->ix2s, i+1, j );
1369 i += 2;
1370 }
1371 else {
1372 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1373 }
1374
1375 }
1376
1377 if (LIKELY(i == N_LINE_ARANGE)) {
1378 /* Construction of the compressed representation was
1379 successful. */
1380 rcinc_LineZ(lineZ);
1381 stats__cache_Z_wbacks++;
1382 } else {
1383 /* Cannot use the compressed(z) representation. Use the full(f)
1384 rep instead. */
1385 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1386 alloc_F_for_writing( sm, &fix );
1387 tl_assert(sm->linesF);
1388 tl_assert(sm->linesF_size > 0);
1389 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1390 lineF = &sm->linesF[fix];
1391 tl_assert(!lineF->inUse);
1392 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1393 lineZ->dict[1] = (SVal)fix;
1394 lineF->inUse = True;
1395 i = 0;
1396 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001397 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001398 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1399 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001400 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001401 tl_assert(sv != SVal_INVALID);
1402 for (m = csvals[k].count; m > 0; m--) {
1403 lineF->w64s[i] = sv;
1404 i++;
1405 }
1406 }
1407 tl_assert(i == N_LINE_ARANGE);
1408 rcinc_LineF(lineF);
1409 stats__cache_F_wbacks++;
1410 }
sewardjf98e1c02008-10-25 16:22:41 +00001411}
1412
1413/* Fetch the cacheline 'wix' from the backing store. The tag
1414 associated with 'wix' is assumed to have already been filled in;
1415 hence that is used to determine where in the backing store to read
1416 from. */
1417static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1418{
1419 Word i;
1420 Addr tag;
1421 CacheLine* cl;
1422 LineZ* lineZ;
1423 LineF* lineF;
1424
1425 if (0)
1426 VG_(printf)("scache fetch line %d\n", (Int)wix);
1427
1428 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1429
1430 tag = cache_shmem.tags0[wix];
1431 cl = &cache_shmem.lyns0[wix];
1432
1433 /* reject nonsense requests */
1434 tl_assert(is_valid_scache_tag(tag));
1435
1436 lineZ = NULL;
1437 lineF = NULL;
1438 find_ZF_for_reading( &lineZ, &lineF, tag );
1439 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1440
1441 /* expand the data into the bottom layer of the tree, then get
1442 cacheline_normalise to build the descriptor array. */
1443 if (lineF) {
1444 tl_assert(lineF->inUse);
1445 for (i = 0; i < N_LINE_ARANGE; i++) {
1446 cl->svals[i] = lineF->w64s[i];
1447 }
1448 stats__cache_F_fetches++;
1449 } else {
1450 for (i = 0; i < N_LINE_ARANGE; i++) {
1451 SVal sv;
1452 UWord ix = read_twobit_array( lineZ->ix2s, i );
1453 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1454 sv = lineZ->dict[ix];
1455 tl_assert(sv != SVal_INVALID);
1456 cl->svals[i] = sv;
1457 }
1458 stats__cache_Z_fetches++;
1459 }
1460 normalise_CacheLine( cl );
1461}
1462
1463static void shmem__invalidate_scache ( void ) {
1464 Word wix;
1465 if (0) VG_(printf)("%s","scache inval\n");
1466 tl_assert(!is_valid_scache_tag(1));
1467 for (wix = 0; wix < N_WAY_NENT; wix++) {
1468 cache_shmem.tags0[wix] = 1/*INVALID*/;
1469 }
1470 stats__cache_invals++;
1471}
1472
1473static void shmem__flush_and_invalidate_scache ( void ) {
1474 Word wix;
1475 Addr tag;
1476 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1477 tl_assert(!is_valid_scache_tag(1));
1478 for (wix = 0; wix < N_WAY_NENT; wix++) {
1479 tag = cache_shmem.tags0[wix];
1480 if (tag == 1/*INVALID*/) {
1481 /* already invalid; nothing to do */
1482 } else {
1483 tl_assert(is_valid_scache_tag(tag));
1484 cacheline_wback( wix );
1485 }
1486 cache_shmem.tags0[wix] = 1/*INVALID*/;
1487 }
1488 stats__cache_flushes++;
1489 stats__cache_invals++;
1490}
1491
1492
1493static inline Bool aligned16 ( Addr a ) {
1494 return 0 == (a & 1);
1495}
1496static inline Bool aligned32 ( Addr a ) {
1497 return 0 == (a & 3);
1498}
1499static inline Bool aligned64 ( Addr a ) {
1500 return 0 == (a & 7);
1501}
1502static inline UWord get_cacheline_offset ( Addr a ) {
1503 return (UWord)(a & (N_LINE_ARANGE - 1));
1504}
1505static inline Addr cacheline_ROUNDUP ( Addr a ) {
1506 return ROUNDUP(a, N_LINE_ARANGE);
1507}
1508static inline Addr cacheline_ROUNDDN ( Addr a ) {
1509 return ROUNDDN(a, N_LINE_ARANGE);
1510}
1511static inline UWord get_treeno ( Addr a ) {
1512 return get_cacheline_offset(a) >> 3;
1513}
1514static inline UWord get_tree_offset ( Addr a ) {
1515 return a & 7;
1516}
1517
1518static __attribute__((noinline))
1519 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1520static inline CacheLine* get_cacheline ( Addr a )
1521{
1522 /* tag is 'a' with the in-line offset masked out,
1523 eg a[31]..a[4] 0000 */
1524 Addr tag = a & ~(N_LINE_ARANGE - 1);
1525 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1526 stats__cache_totrefs++;
1527 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1528 return &cache_shmem.lyns0[wix];
1529 } else {
1530 return get_cacheline_MISS( a );
1531 }
1532}
1533
1534static __attribute__((noinline))
1535 CacheLine* get_cacheline_MISS ( Addr a )
1536{
1537 /* tag is 'a' with the in-line offset masked out,
1538 eg a[31]..a[4] 0000 */
1539
1540 CacheLine* cl;
1541 Addr* tag_old_p;
1542 Addr tag = a & ~(N_LINE_ARANGE - 1);
1543 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1544
1545 tl_assert(tag != cache_shmem.tags0[wix]);
1546
1547 /* Dump the old line into the backing store. */
1548 stats__cache_totmisses++;
1549
1550 cl = &cache_shmem.lyns0[wix];
1551 tag_old_p = &cache_shmem.tags0[wix];
1552
1553 if (is_valid_scache_tag( *tag_old_p )) {
1554 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001555 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001556 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1557 cacheline_wback( wix );
1558 }
1559 /* and reload the new one */
1560 *tag_old_p = tag;
1561 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001562 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001563 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1564 return cl;
1565}
1566
1567static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1568 stats__cline_64to32pulldown++;
1569 switch (toff) {
1570 case 0: case 4:
1571 tl_assert(descr & TREE_DESCR_64);
1572 tree[4] = tree[0];
1573 descr &= ~TREE_DESCR_64;
1574 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1575 break;
1576 default:
1577 tl_assert(0);
1578 }
1579 return descr;
1580}
1581
1582static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1583 stats__cline_32to16pulldown++;
1584 switch (toff) {
1585 case 0: case 2:
1586 if (!(descr & TREE_DESCR_32_0)) {
1587 descr = pulldown_to_32(tree, 0, descr);
1588 }
1589 tl_assert(descr & TREE_DESCR_32_0);
1590 tree[2] = tree[0];
1591 descr &= ~TREE_DESCR_32_0;
1592 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1593 break;
1594 case 4: case 6:
1595 if (!(descr & TREE_DESCR_32_1)) {
1596 descr = pulldown_to_32(tree, 4, descr);
1597 }
1598 tl_assert(descr & TREE_DESCR_32_1);
1599 tree[6] = tree[4];
1600 descr &= ~TREE_DESCR_32_1;
1601 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1602 break;
1603 default:
1604 tl_assert(0);
1605 }
1606 return descr;
1607}
1608
1609static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1610 stats__cline_16to8pulldown++;
1611 switch (toff) {
1612 case 0: case 1:
1613 if (!(descr & TREE_DESCR_16_0)) {
1614 descr = pulldown_to_16(tree, 0, descr);
1615 }
1616 tl_assert(descr & TREE_DESCR_16_0);
1617 tree[1] = tree[0];
1618 descr &= ~TREE_DESCR_16_0;
1619 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1620 break;
1621 case 2: case 3:
1622 if (!(descr & TREE_DESCR_16_1)) {
1623 descr = pulldown_to_16(tree, 2, descr);
1624 }
1625 tl_assert(descr & TREE_DESCR_16_1);
1626 tree[3] = tree[2];
1627 descr &= ~TREE_DESCR_16_1;
1628 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1629 break;
1630 case 4: case 5:
1631 if (!(descr & TREE_DESCR_16_2)) {
1632 descr = pulldown_to_16(tree, 4, descr);
1633 }
1634 tl_assert(descr & TREE_DESCR_16_2);
1635 tree[5] = tree[4];
1636 descr &= ~TREE_DESCR_16_2;
1637 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1638 break;
1639 case 6: case 7:
1640 if (!(descr & TREE_DESCR_16_3)) {
1641 descr = pulldown_to_16(tree, 6, descr);
1642 }
1643 tl_assert(descr & TREE_DESCR_16_3);
1644 tree[7] = tree[6];
1645 descr &= ~TREE_DESCR_16_3;
1646 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1647 break;
1648 default:
1649 tl_assert(0);
1650 }
1651 return descr;
1652}
1653
1654
1655static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1656 UShort mask;
1657 switch (toff) {
1658 case 0:
1659 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1660 tl_assert( (descr & mask) == mask );
1661 descr &= ~mask;
1662 descr |= TREE_DESCR_16_0;
1663 break;
1664 case 2:
1665 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1666 tl_assert( (descr & mask) == mask );
1667 descr &= ~mask;
1668 descr |= TREE_DESCR_16_1;
1669 break;
1670 case 4:
1671 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1672 tl_assert( (descr & mask) == mask );
1673 descr &= ~mask;
1674 descr |= TREE_DESCR_16_2;
1675 break;
1676 case 6:
1677 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1678 tl_assert( (descr & mask) == mask );
1679 descr &= ~mask;
1680 descr |= TREE_DESCR_16_3;
1681 break;
1682 default:
1683 tl_assert(0);
1684 }
1685 return descr;
1686}
1687
1688static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1689 UShort mask;
1690 switch (toff) {
1691 case 0:
1692 if (!(descr & TREE_DESCR_16_0))
1693 descr = pullup_descr_to_16(descr, 0);
1694 if (!(descr & TREE_DESCR_16_1))
1695 descr = pullup_descr_to_16(descr, 2);
1696 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1697 tl_assert( (descr & mask) == mask );
1698 descr &= ~mask;
1699 descr |= TREE_DESCR_32_0;
1700 break;
1701 case 4:
1702 if (!(descr & TREE_DESCR_16_2))
1703 descr = pullup_descr_to_16(descr, 4);
1704 if (!(descr & TREE_DESCR_16_3))
1705 descr = pullup_descr_to_16(descr, 6);
1706 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1707 tl_assert( (descr & mask) == mask );
1708 descr &= ~mask;
1709 descr |= TREE_DESCR_32_1;
1710 break;
1711 default:
1712 tl_assert(0);
1713 }
1714 return descr;
1715}
1716
1717static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1718 switch (toff) {
1719 case 0: case 4:
1720 return 0 != (descr & TREE_DESCR_64);
1721 default:
1722 tl_assert(0);
1723 }
1724}
1725
1726static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1727 switch (toff) {
1728 case 0:
1729 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1730 case 2:
1731 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1732 case 4:
1733 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1734 case 6:
1735 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1736 default:
1737 tl_assert(0);
1738 }
1739}
1740
1741/* ------------ Cache management ------------ */
1742
1743static void zsm_flush_cache ( void )
1744{
1745 shmem__flush_and_invalidate_scache();
1746}
1747
1748
1749static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1750{
1751 tl_assert( sizeof(UWord) == sizeof(Addr) );
1752
1753 rcinc = p_rcinc;
1754 rcdec = p_rcdec;
1755
1756 tl_assert(map_shmem == NULL);
1757 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1758 HG_(free),
1759 NULL/*unboxed UWord cmp*/);
1760 tl_assert(map_shmem != NULL);
1761 shmem__invalidate_scache();
1762
1763 /* a SecMap must contain an integral number of CacheLines */
1764 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1765 /* also ... a CacheLine holds an integral number of trees */
1766 tl_assert(0 == (N_LINE_ARANGE % 8));
1767}
1768
1769/////////////////////////////////////////////////////////////////
1770/////////////////////////////////////////////////////////////////
1771// //
1772// SECTION END compressed shadow memory //
1773// //
1774/////////////////////////////////////////////////////////////////
1775/////////////////////////////////////////////////////////////////
1776
1777
1778
1779/////////////////////////////////////////////////////////////////
1780/////////////////////////////////////////////////////////////////
1781// //
1782// SECTION BEGIN vts primitives //
1783// //
1784/////////////////////////////////////////////////////////////////
1785/////////////////////////////////////////////////////////////////
1786
sewardjf98e1c02008-10-25 16:22:41 +00001787
sewardje4cce742011-02-24 15:25:24 +00001788/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1789 being compact stand-ins for Thr*'s. Use these functions to map
1790 between them. */
1791static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
1792static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
1793
sewardje4cce742011-02-24 15:25:24 +00001794__attribute__((noreturn))
1795static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
1796{
1797 if (due_to_nThrs) {
1798 HChar* s =
1799 "\n"
1800 "Helgrind: cannot continue, run aborted: too many threads.\n"
1801 "Sorry. Helgrind can only handle programs that create\n"
1802 "%'llu or fewer threads over their entire lifetime.\n"
1803 "\n";
sewardj03e7d272011-05-04 09:08:34 +00001804 VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
sewardje4cce742011-02-24 15:25:24 +00001805 } else {
1806 HChar* s =
1807 "\n"
1808 "Helgrind: cannot continue, run aborted: too many\n"
1809 "synchronisation events. Sorry. Helgrind can only handle\n"
1810 "programs which perform %'llu or fewer\n"
1811 "inter-thread synchronisation events (locks, unlocks, etc).\n"
1812 "\n";
1813 VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
1814 }
1815 VG_(exit)(1);
1816 /*NOTREACHED*/
1817 tl_assert(0); /*wtf?!*/
1818}
1819
1820
sewardjffce8152011-06-24 10:09:41 +00001821/* The dead thread (ThrID, actually) table. A thread may only be
1822 listed here if we have been notified thereof by libhb_async_exit.
1823 New entries are added at the end. The order isn't important, but
1824 the ThrID values must be unique. This table lists the identity of
1825 all threads that have ever died -- none are ever removed. We keep
1826 this table so as to be able to prune entries from VTSs. We don't
1827 actually need to keep the set of threads that have ever died --
1828 only the threads that have died since the previous round of
1829 pruning. But it's useful for sanity check purposes to keep the
1830 entire set, so we do. */
1831static XArray* /* of ThrID */ verydead_thread_table = NULL;
1832
1833/* Arbitrary total ordering on ThrIDs. */
1834static Int cmp__ThrID ( void* v1, void* v2 ) {
1835 ThrID id1 = *(ThrID*)v1;
1836 ThrID id2 = *(ThrID*)v2;
1837 if (id1 < id2) return -1;
1838 if (id1 > id2) return 1;
1839 return 0;
1840}
1841
1842static void verydead_thread_table_init ( void )
1843{
1844 tl_assert(!verydead_thread_table);
1845 verydead_thread_table
1846 = VG_(newXA)( HG_(zalloc),
1847 "libhb.verydead_thread_table_init.1",
1848 HG_(free), sizeof(ThrID) );
1849 tl_assert(verydead_thread_table);
1850 VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
1851}
1852
sewardjf98e1c02008-10-25 16:22:41 +00001853
1854/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1855 a backlink for the caller's convenience. Since we have no idea
1856 what to set that to in the library, it always gets set to
1857 VtsID_INVALID. */
1858typedef
1859 struct {
sewardj7aa38a92011-02-27 23:04:12 +00001860 VtsID id;
1861 UInt usedTS;
1862 UInt sizeTS;
1863 ScalarTS ts[0];
sewardjf98e1c02008-10-25 16:22:41 +00001864 }
1865 VTS;
1866
sewardj7aa38a92011-02-27 23:04:12 +00001867/* Allocate a VTS capable of storing 'sizeTS' entries. */
1868static VTS* VTS__new ( HChar* who, UInt sizeTS );
sewardjf98e1c02008-10-25 16:22:41 +00001869
sewardjffce8152011-06-24 10:09:41 +00001870/* Make a clone of 'vts', sizing the new array to exactly match the
sewardj7aa38a92011-02-27 23:04:12 +00001871 number of ScalarTSs present. */
1872static VTS* VTS__clone ( HChar* who, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001873
sewardjffce8152011-06-24 10:09:41 +00001874/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
1875 array is sized exactly to hold the number of required elements.
1876 'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
1877 must be in strictly increasing order. */
1878static VTS* VTS__subtract ( HChar* who, VTS* vts, XArray* thridsToDel );
1879
sewardjf98e1c02008-10-25 16:22:41 +00001880/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00001881static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001882
sewardj7aa38a92011-02-27 23:04:12 +00001883/* Create a new singleton VTS in 'out'. Caller must have
1884 pre-allocated 'out' sufficiently big to hold the result in all
1885 possible cases. */
1886static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00001887
sewardj7aa38a92011-02-27 23:04:12 +00001888/* Create in 'out' a VTS which is the same as 'vts' except with
1889 vts[me]++, so to speak. Caller must have pre-allocated 'out'
1890 sufficiently big to hold the result in all possible cases. */
1891static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001892
sewardj7aa38a92011-02-27 23:04:12 +00001893/* Create in 'out' a VTS which is the join (max) of 'a' and
1894 'b'. Caller must have pre-allocated 'out' sufficiently big to hold
1895 the result in all possible cases. */
1896static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001897
sewardj23f12002009-07-24 08:45:08 +00001898/* Compute the partial ordering relation of the two args. Although we
1899 could be completely general and return an enumeration value (EQ,
1900 LT, GT, UN), in fact we only need LEQ, and so we may as well
1901 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00001902
sewardje4cce742011-02-24 15:25:24 +00001903 Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
1904 invald ThrID). In the latter case, the returned ThrID indicates
1905 the discovered point for which they are not. There may be more
1906 than one such point, but we only care about seeing one of them, not
1907 all of them. This rather strange convention is used because
1908 sometimes we want to know the actual index at which they first
1909 differ. */
1910static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001911
1912/* Compute an arbitrary structural (total) ordering on the two args,
1913 based on their VCs, so they can be looked up in a table, tree, etc.
1914 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00001915static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001916
1917/* Debugging only. Display the given VTS in the buffer. */
sewardj23f12002009-07-24 08:45:08 +00001918static void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001919
1920/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00001921static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001922
sewardjffce8152011-06-24 10:09:41 +00001923/* Notify the VTS machinery that a thread has been declared
1924 comprehensively dead: that is, it has done an async exit AND it has
1925 been joined with. This should ensure that its local clocks (.viR
1926 and .viW) will never again change, and so all mentions of this
1927 thread from all VTSs in the system may be removed. */
1928static void VTS__declare_thread_very_dead ( Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001929
1930/*--------------- to do with Vector Timestamps ---------------*/
1931
sewardjf98e1c02008-10-25 16:22:41 +00001932static Bool is_sane_VTS ( VTS* vts )
1933{
1934 UWord i, n;
1935 ScalarTS *st1, *st2;
1936 if (!vts) return False;
1937 if (!vts->ts) return False;
sewardj555fc572011-02-27 23:39:53 +00001938 if (vts->usedTS > vts->sizeTS) return False;
sewardj7aa38a92011-02-27 23:04:12 +00001939 n = vts->usedTS;
1940 if (n == 1) {
1941 st1 = &vts->ts[0];
1942 if (st1->tym == 0)
1943 return False;
1944 }
1945 else
sewardjf98e1c02008-10-25 16:22:41 +00001946 if (n >= 2) {
1947 for (i = 0; i < n-1; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00001948 st1 = &vts->ts[i];
1949 st2 = &vts->ts[i+1];
sewardje4cce742011-02-24 15:25:24 +00001950 if (st1->thrid >= st2->thrid)
sewardjf98e1c02008-10-25 16:22:41 +00001951 return False;
1952 if (st1->tym == 0 || st2->tym == 0)
1953 return False;
1954 }
1955 }
1956 return True;
1957}
1958
1959
sewardj7aa38a92011-02-27 23:04:12 +00001960/* Create a new, empty VTS.
sewardjf98e1c02008-10-25 16:22:41 +00001961*/
sewardj7aa38a92011-02-27 23:04:12 +00001962static VTS* VTS__new ( HChar* who, UInt sizeTS )
sewardjf98e1c02008-10-25 16:22:41 +00001963{
sewardj7aa38a92011-02-27 23:04:12 +00001964 VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
1965 tl_assert(vts->usedTS == 0);
1966 vts->sizeTS = sizeTS;
1967 *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
sewardjf98e1c02008-10-25 16:22:41 +00001968 return vts;
1969}
1970
sewardj7aa38a92011-02-27 23:04:12 +00001971/* Clone this VTS.
1972*/
1973static VTS* VTS__clone ( HChar* who, VTS* vts )
1974{
1975 tl_assert(vts);
1976 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1977 UInt nTS = vts->usedTS;
1978 VTS* clone = VTS__new(who, nTS);
1979 clone->id = vts->id;
1980 clone->sizeTS = nTS;
1981 clone->usedTS = nTS;
1982 UInt i;
1983 for (i = 0; i < nTS; i++) {
1984 clone->ts[i] = vts->ts[i];
1985 }
1986 tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1987 return clone;
1988}
1989
sewardjf98e1c02008-10-25 16:22:41 +00001990
sewardjffce8152011-06-24 10:09:41 +00001991/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
1992 must be in strictly increasing order. We could obviously do this
1993 much more efficiently (in linear time) if necessary.
1994*/
1995static VTS* VTS__subtract ( HChar* who, VTS* vts, XArray* thridsToDel )
1996{
1997 UInt i, j;
1998 tl_assert(vts);
1999 tl_assert(thridsToDel);
2000 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2001 UInt nTS = vts->usedTS;
2002 /* Figure out how many ScalarTSs will remain in the output. */
2003 UInt nReq = nTS;
2004 for (i = 0; i < nTS; i++) {
2005 ThrID thrid = vts->ts[i].thrid;
2006 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2007 nReq--;
2008 }
2009 tl_assert(nReq <= nTS);
2010 /* Copy the ones that will remain. */
2011 VTS* res = VTS__new(who, nReq);
2012 j = 0;
2013 for (i = 0; i < nTS; i++) {
2014 ThrID thrid = vts->ts[i].thrid;
2015 if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2016 continue;
2017 res->ts[j++] = vts->ts[i];
2018 }
2019 tl_assert(j == nReq);
2020 tl_assert(j == res->sizeTS);
2021 res->usedTS = j;
2022 tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2023 return res;
2024}
2025
2026
sewardjf98e1c02008-10-25 16:22:41 +00002027/* Delete this VTS in its entirety.
2028*/
sewardj7aa38a92011-02-27 23:04:12 +00002029static void VTS__delete ( VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002030{
2031 tl_assert(vts);
sewardj7aa38a92011-02-27 23:04:12 +00002032 tl_assert(vts->usedTS <= vts->sizeTS);
2033 tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
sewardjf98e1c02008-10-25 16:22:41 +00002034 HG_(free)(vts);
2035}
2036
2037
2038/* Create a new singleton VTS.
2039*/
sewardj7aa38a92011-02-27 23:04:12 +00002040static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2041{
sewardjf98e1c02008-10-25 16:22:41 +00002042 tl_assert(thr);
2043 tl_assert(tym >= 1);
sewardj7aa38a92011-02-27 23:04:12 +00002044 tl_assert(out);
2045 tl_assert(out->usedTS == 0);
2046 tl_assert(out->sizeTS >= 1);
2047 UInt hi = out->usedTS++;
2048 out->ts[hi].thrid = Thr__to_ThrID(thr);
2049 out->ts[hi].tym = tym;
sewardjf98e1c02008-10-25 16:22:41 +00002050}
2051
2052
2053/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
2054 not modified.
2055*/
sewardj7aa38a92011-02-27 23:04:12 +00002056static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
sewardjf98e1c02008-10-25 16:22:41 +00002057{
sewardj7aa38a92011-02-27 23:04:12 +00002058 UInt i, n;
sewardje4cce742011-02-24 15:25:24 +00002059 ThrID me_thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002060 Bool found = False;
sewardjc8028ad2010-05-05 09:34:42 +00002061
2062 stats__vts__tick++;
2063
sewardj7aa38a92011-02-27 23:04:12 +00002064 tl_assert(out);
2065 tl_assert(out->usedTS == 0);
2066 if (vts->usedTS >= ThrID_MAX_VALID)
2067 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2068 tl_assert(out->sizeTS >= 1 + vts->usedTS);
2069
sewardjf98e1c02008-10-25 16:22:41 +00002070 tl_assert(me);
sewardje4cce742011-02-24 15:25:24 +00002071 me_thrid = Thr__to_ThrID(me);
sewardjf98e1c02008-10-25 16:22:41 +00002072 tl_assert(is_sane_VTS(vts));
sewardj7aa38a92011-02-27 23:04:12 +00002073 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002074
sewardj555fc572011-02-27 23:39:53 +00002075 /* Copy all entries which precede 'me'. */
2076 for (i = 0; i < n; i++) {
2077 ScalarTS* here = &vts->ts[i];
2078 if (UNLIKELY(here->thrid >= me_thrid))
2079 break;
2080 UInt hi = out->usedTS++;
2081 out->ts[hi] = *here;
2082 }
2083
2084 /* 'i' now indicates the next entry to copy, if any.
2085 There are 3 possibilities:
2086 (a) there is no next entry (we used them all up already):
2087 add (me_thrid,1) to the output, and quit
2088 (b) there is a next entry, and its thrid > me_thrid:
2089 add (me_thrid,1) to the output, then copy the remaining entries
2090 (c) there is a next entry, and its thrid == me_thrid:
2091 copy it to the output but increment its timestamp value.
2092 Then copy the remaining entries. (c) is the common case.
2093 */
2094 tl_assert(i >= 0 && i <= n);
2095 if (i == n) { /* case (a) */
sewardj7aa38a92011-02-27 23:04:12 +00002096 UInt hi = out->usedTS++;
2097 out->ts[hi].thrid = me_thrid;
2098 out->ts[hi].tym = 1;
sewardj555fc572011-02-27 23:39:53 +00002099 } else {
2100 /* cases (b) and (c) */
2101 ScalarTS* here = &vts->ts[i];
2102 if (me_thrid == here->thrid) { /* case (c) */
sewardj7aa38a92011-02-27 23:04:12 +00002103 if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
sewardje4cce742011-02-24 15:25:24 +00002104 /* We're hosed. We have to stop. */
2105 scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2106 }
sewardj7aa38a92011-02-27 23:04:12 +00002107 UInt hi = out->usedTS++;
2108 out->ts[hi].thrid = here->thrid;
2109 out->ts[hi].tym = here->tym + 1;
sewardjf98e1c02008-10-25 16:22:41 +00002110 i++;
sewardj555fc572011-02-27 23:39:53 +00002111 found = True;
2112 } else { /* case (b) */
sewardj7aa38a92011-02-27 23:04:12 +00002113 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002114 out->ts[hi].thrid = me_thrid;
2115 out->ts[hi].tym = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002116 }
sewardj555fc572011-02-27 23:39:53 +00002117 /* And copy any remaining entries. */
sewardjf98e1c02008-10-25 16:22:41 +00002118 for (/*keepgoing*/; i < n; i++) {
sewardj555fc572011-02-27 23:39:53 +00002119 ScalarTS* here2 = &vts->ts[i];
sewardj7aa38a92011-02-27 23:04:12 +00002120 UInt hi = out->usedTS++;
sewardj555fc572011-02-27 23:39:53 +00002121 out->ts[hi] = *here2;
sewardjf98e1c02008-10-25 16:22:41 +00002122 }
2123 }
sewardj555fc572011-02-27 23:39:53 +00002124
sewardj7aa38a92011-02-27 23:04:12 +00002125 tl_assert(is_sane_VTS(out));
2126 tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2127 tl_assert(out->usedTS <= out->sizeTS);
sewardjf98e1c02008-10-25 16:22:41 +00002128}
2129
2130
2131/* Return a new VTS constructed as the join (max) of the 2 args.
2132 Neither arg is modified.
2133*/
sewardj7aa38a92011-02-27 23:04:12 +00002134static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002135{
sewardj7aa38a92011-02-27 23:04:12 +00002136 UInt ia, ib, useda, usedb;
sewardjf98e1c02008-10-25 16:22:41 +00002137 ULong tyma, tymb, tymMax;
sewardje4cce742011-02-24 15:25:24 +00002138 ThrID thrid;
sewardj7aa38a92011-02-27 23:04:12 +00002139 UInt ncommon = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002140
sewardjc8028ad2010-05-05 09:34:42 +00002141 stats__vts__join++;
2142
sewardj7aa38a92011-02-27 23:04:12 +00002143 tl_assert(a);
2144 tl_assert(b);
2145 useda = a->usedTS;
2146 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002147
sewardj7aa38a92011-02-27 23:04:12 +00002148 tl_assert(out);
2149 tl_assert(out->usedTS == 0);
2150 /* overly conservative test, but doing better involves comparing
2151 the two VTSs, which we don't want to do at this point. */
2152 if (useda + usedb >= ThrID_MAX_VALID)
2153 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2154 tl_assert(out->sizeTS >= useda + usedb);
2155
sewardjf98e1c02008-10-25 16:22:41 +00002156 ia = ib = 0;
2157
2158 while (1) {
2159
sewardje4cce742011-02-24 15:25:24 +00002160 /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2161 from a and b in order, where thrid is the next ThrID
sewardjf98e1c02008-10-25 16:22:41 +00002162 occurring in either a or b, and tyma/b are the relevant
2163 scalar timestamps, taking into account implicit zeroes. */
2164 tl_assert(ia >= 0 && ia <= useda);
2165 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002166
njn4c245e52009-03-15 23:25:38 +00002167 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002168 /* both empty - done */
2169 break;
njn4c245e52009-03-15 23:25:38 +00002170
2171 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002172 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002173 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002174 thrid = tmpb->thrid;
2175 tyma = 0;
2176 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002177 ib++;
njn4c245e52009-03-15 23:25:38 +00002178
2179 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002180 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002181 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002182 thrid = tmpa->thrid;
2183 tyma = tmpa->tym;
2184 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002185 ia++;
njn4c245e52009-03-15 23:25:38 +00002186
2187 } else {
sewardje4cce742011-02-24 15:25:24 +00002188 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002189 ScalarTS* tmpa = &a->ts[ia];
2190 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002191 if (tmpa->thrid < tmpb->thrid) {
2192 /* a has the lowest unconsidered ThrID */
2193 thrid = tmpa->thrid;
2194 tyma = tmpa->tym;
2195 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002196 ia++;
sewardje4cce742011-02-24 15:25:24 +00002197 } else if (tmpa->thrid > tmpb->thrid) {
2198 /* b has the lowest unconsidered ThrID */
2199 thrid = tmpb->thrid;
2200 tyma = 0;
2201 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002202 ib++;
2203 } else {
sewardje4cce742011-02-24 15:25:24 +00002204 /* they both next mention the same ThrID */
2205 tl_assert(tmpa->thrid == tmpb->thrid);
2206 thrid = tmpa->thrid; /* == tmpb->thrid */
2207 tyma = tmpa->tym;
2208 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002209 ia++;
2210 ib++;
sewardj7aa38a92011-02-27 23:04:12 +00002211 ncommon++;
sewardjf98e1c02008-10-25 16:22:41 +00002212 }
2213 }
2214
2215 /* having laboriously determined (thr, tyma, tymb), do something
2216 useful with it. */
2217 tymMax = tyma > tymb ? tyma : tymb;
2218 if (tymMax > 0) {
sewardj7aa38a92011-02-27 23:04:12 +00002219 UInt hi = out->usedTS++;
2220 out->ts[hi].thrid = thrid;
2221 out->ts[hi].tym = tymMax;
sewardjf98e1c02008-10-25 16:22:41 +00002222 }
2223
2224 }
2225
sewardj7aa38a92011-02-27 23:04:12 +00002226 tl_assert(is_sane_VTS(out));
2227 tl_assert(out->usedTS <= out->sizeTS);
2228 tl_assert(out->usedTS == useda + usedb - ncommon);
sewardjf98e1c02008-10-25 16:22:41 +00002229}
2230
2231
sewardje4cce742011-02-24 15:25:24 +00002232/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
2233 they are, or the first ThrID for which they are not (no valid ThrID
2234 has the value zero). This rather strange convention is used
2235 because sometimes we want to know the actual index at which they
2236 first differ. */
2237static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00002238{
sewardj23f12002009-07-24 08:45:08 +00002239 Word ia, ib, useda, usedb;
2240 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00002241
sewardjc8028ad2010-05-05 09:34:42 +00002242 stats__vts__cmpLEQ++;
2243
sewardj7aa38a92011-02-27 23:04:12 +00002244 tl_assert(a);
2245 tl_assert(b);
2246 useda = a->usedTS;
2247 usedb = b->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002248
2249 ia = ib = 0;
2250
2251 while (1) {
2252
njn4c245e52009-03-15 23:25:38 +00002253 /* This logic is to enumerate doubles (tyma, tymb) drawn
2254 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00002255 scalar timestamps, taking into account implicit zeroes. */
sewardje4cce742011-02-24 15:25:24 +00002256 ThrID thrid;
sewardj23f12002009-07-24 08:45:08 +00002257
sewardjf98e1c02008-10-25 16:22:41 +00002258 tl_assert(ia >= 0 && ia <= useda);
2259 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00002260
njn4c245e52009-03-15 23:25:38 +00002261 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002262 /* both empty - done */
2263 break;
njn4c245e52009-03-15 23:25:38 +00002264
2265 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002266 /* a empty, use up b */
sewardj7aa38a92011-02-27 23:04:12 +00002267 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002268 tyma = 0;
2269 tymb = tmpb->tym;
2270 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002271 ib++;
njn4c245e52009-03-15 23:25:38 +00002272
2273 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00002274 /* b empty, use up a */
sewardj7aa38a92011-02-27 23:04:12 +00002275 ScalarTS* tmpa = &a->ts[ia];
sewardje4cce742011-02-24 15:25:24 +00002276 tyma = tmpa->tym;
2277 thrid = tmpa->thrid;
2278 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002279 ia++;
njn4c245e52009-03-15 23:25:38 +00002280
2281 } else {
sewardje4cce742011-02-24 15:25:24 +00002282 /* both not empty; extract lowest-ThrID'd triple */
sewardj7aa38a92011-02-27 23:04:12 +00002283 ScalarTS* tmpa = &a->ts[ia];
2284 ScalarTS* tmpb = &b->ts[ib];
sewardje4cce742011-02-24 15:25:24 +00002285 if (tmpa->thrid < tmpb->thrid) {
2286 /* a has the lowest unconsidered ThrID */
2287 tyma = tmpa->tym;
2288 thrid = tmpa->thrid;
2289 tymb = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002290 ia++;
2291 }
2292 else
sewardje4cce742011-02-24 15:25:24 +00002293 if (tmpa->thrid > tmpb->thrid) {
2294 /* b has the lowest unconsidered ThrID */
2295 tyma = 0;
2296 tymb = tmpb->tym;
2297 thrid = tmpb->thrid;
sewardjf98e1c02008-10-25 16:22:41 +00002298 ib++;
2299 } else {
sewardje4cce742011-02-24 15:25:24 +00002300 /* they both next mention the same ThrID */
2301 tl_assert(tmpa->thrid == tmpb->thrid);
2302 tyma = tmpa->tym;
2303 thrid = tmpa->thrid;
2304 tymb = tmpb->tym;
sewardjf98e1c02008-10-25 16:22:41 +00002305 ia++;
2306 ib++;
2307 }
2308 }
2309
njn4c245e52009-03-15 23:25:38 +00002310 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00002311 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00002312 if (tyma > tymb) {
2313 /* not LEQ at this index. Quit, since the answer is
2314 determined already. */
sewardje4cce742011-02-24 15:25:24 +00002315 tl_assert(thrid >= 1024);
2316 return thrid;
sewardj23f12002009-07-24 08:45:08 +00002317 }
sewardjf98e1c02008-10-25 16:22:41 +00002318 }
2319
sewardje4cce742011-02-24 15:25:24 +00002320 return 0; /* all points are LEQ => return an invalid ThrID */
sewardjf98e1c02008-10-25 16:22:41 +00002321}
2322
2323
2324/* Compute an arbitrary structural (total) ordering on the two args,
2325 based on their VCs, so they can be looked up in a table, tree, etc.
sewardjc8028ad2010-05-05 09:34:42 +00002326 Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
2327 performance critical so there is some effort expended to make it sa
2328 fast as possible.
sewardjf98e1c02008-10-25 16:22:41 +00002329*/
2330Word VTS__cmp_structural ( VTS* a, VTS* b )
2331{
2332 /* We just need to generate an arbitrary total ordering based on
2333 a->ts and b->ts. Preferably do it in a way which comes across likely
2334 differences relatively quickly. */
sewardjc8028ad2010-05-05 09:34:42 +00002335 Word i;
2336 Word useda = 0, usedb = 0;
2337 ScalarTS *ctsa = NULL, *ctsb = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002338
sewardjc8028ad2010-05-05 09:34:42 +00002339 stats__vts__cmp_structural++;
2340
2341 tl_assert(a);
2342 tl_assert(b);
2343
sewardj7aa38a92011-02-27 23:04:12 +00002344 ctsa = &a->ts[0]; useda = a->usedTS;
2345 ctsb = &b->ts[0]; usedb = b->usedTS;
sewardjc8028ad2010-05-05 09:34:42 +00002346
2347 if (LIKELY(useda == usedb)) {
2348 ScalarTS *tmpa = NULL, *tmpb = NULL;
2349 stats__vts__cmp_structural_slow++;
2350 /* Same length vectors. Find the first difference, if any, as
2351 fast as possible. */
2352 for (i = 0; i < useda; i++) {
2353 tmpa = &ctsa[i];
2354 tmpb = &ctsb[i];
sewardje4cce742011-02-24 15:25:24 +00002355 if (LIKELY(tmpa->tym == tmpb->tym
2356 && tmpa->thrid == tmpb->thrid))
sewardjc8028ad2010-05-05 09:34:42 +00002357 continue;
2358 else
2359 break;
2360 }
2361 if (UNLIKELY(i == useda)) {
2362 /* They're identical. */
2363 return 0;
2364 } else {
2365 tl_assert(i >= 0 && i < useda);
2366 if (tmpa->tym < tmpb->tym) return -1;
2367 if (tmpa->tym > tmpb->tym) return 1;
sewardje4cce742011-02-24 15:25:24 +00002368 if (tmpa->thrid < tmpb->thrid) return -1;
2369 if (tmpa->thrid > tmpb->thrid) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002370 /* we just established them as non-identical, hence: */
2371 }
2372 /*NOTREACHED*/
2373 tl_assert(0);
2374 }
sewardjf98e1c02008-10-25 16:22:41 +00002375
2376 if (useda < usedb) return -1;
2377 if (useda > usedb) return 1;
sewardjc8028ad2010-05-05 09:34:42 +00002378 /*NOTREACHED*/
2379 tl_assert(0);
sewardjf98e1c02008-10-25 16:22:41 +00002380}
2381
2382
2383/* Debugging only. Display the given VTS in the buffer.
2384*/
sewardj7aa38a92011-02-27 23:04:12 +00002385void VTS__show ( HChar* buf, Int nBuf, VTS* vts )
2386{
sewardjf98e1c02008-10-25 16:22:41 +00002387 ScalarTS* st;
2388 HChar unit[64];
2389 Word i, n;
2390 Int avail = nBuf;
2391 tl_assert(vts && vts->ts);
2392 tl_assert(nBuf > 16);
2393 buf[0] = '[';
2394 buf[1] = 0;
sewardj7aa38a92011-02-27 23:04:12 +00002395 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002396 for (i = 0; i < n; i++) {
2397 tl_assert(avail >= 40);
sewardj7aa38a92011-02-27 23:04:12 +00002398 st = &vts->ts[i];
sewardjf98e1c02008-10-25 16:22:41 +00002399 VG_(memset)(unit, 0, sizeof(unit));
sewardje4cce742011-02-24 15:25:24 +00002400 VG_(sprintf)(unit, i < n-1 ? "%u:%llu " : "%u:%llu",
2401 st->thrid, (ULong)st->tym);
sewardjf98e1c02008-10-25 16:22:41 +00002402 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
2403 VG_(strcat)(buf, " ...]");
2404 buf[nBuf-1] = 0;
2405 return;
2406 }
2407 VG_(strcat)(buf, unit);
2408 avail -= VG_(strlen)(unit);
2409 }
2410 VG_(strcat)(buf, "]");
2411 buf[nBuf-1] = 0;
2412}
2413
2414
2415/* Debugging only. Return vts[index], so to speak.
2416*/
sewardj7aa38a92011-02-27 23:04:12 +00002417ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2418{
sewardjf98e1c02008-10-25 16:22:41 +00002419 UWord i, n;
sewardje4cce742011-02-24 15:25:24 +00002420 ThrID idx_thrid = Thr__to_ThrID(idx);
sewardjc8028ad2010-05-05 09:34:42 +00002421 stats__vts__indexat_slow++;
sewardjf98e1c02008-10-25 16:22:41 +00002422 tl_assert(vts && vts->ts);
sewardj7aa38a92011-02-27 23:04:12 +00002423 n = vts->usedTS;
sewardjf98e1c02008-10-25 16:22:41 +00002424 for (i = 0; i < n; i++) {
sewardj7aa38a92011-02-27 23:04:12 +00002425 ScalarTS* st = &vts->ts[i];
sewardje4cce742011-02-24 15:25:24 +00002426 if (st->thrid == idx_thrid)
sewardjf98e1c02008-10-25 16:22:41 +00002427 return st->tym;
2428 }
2429 return 0;
2430}
2431
2432
sewardjffce8152011-06-24 10:09:41 +00002433/* See comment on prototype above.
2434*/
2435static void VTS__declare_thread_very_dead ( Thr* thr )
2436{
2437 if (0) VG_(printf)("VTQ: tae %p\n", thr);
2438
2439 tl_assert(thr->llexit_done);
2440 tl_assert(thr->joinedwith_done);
2441
2442 ThrID nyu;
2443 nyu = Thr__to_ThrID(thr);
2444 VG_(addToXA)( verydead_thread_table, &nyu );
2445
2446 /* We can only get here if we're assured that we'll never again
2447 need to look at this thread's ::viR or ::viW. Set them to
2448 VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2449 mostly so that we don't wind up pruning them (as that would be
2450 nonsensical: the only interesting ScalarTS entry for a dead
2451 thread is its own index, and the pruning will remove that.). */
2452 VtsID__rcdec(thr->viR);
2453 VtsID__rcdec(thr->viW);
2454 thr->viR = VtsID_INVALID;
2455 thr->viW = VtsID_INVALID;
2456}
2457
2458
sewardjf98e1c02008-10-25 16:22:41 +00002459/////////////////////////////////////////////////////////////////
2460/////////////////////////////////////////////////////////////////
2461// //
2462// SECTION END vts primitives //
2463// //
2464/////////////////////////////////////////////////////////////////
2465/////////////////////////////////////////////////////////////////
2466
2467
2468
2469/////////////////////////////////////////////////////////////////
2470/////////////////////////////////////////////////////////////////
2471// //
2472// SECTION BEGIN main library //
2473// //
2474/////////////////////////////////////////////////////////////////
2475/////////////////////////////////////////////////////////////////
2476
2477
2478/////////////////////////////////////////////////////////
2479// //
2480// VTS set //
2481// //
2482/////////////////////////////////////////////////////////
2483
sewardjffce8152011-06-24 10:09:41 +00002484static WordFM* /* WordFM VTS* void */ vts_set = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002485
2486static void vts_set_init ( void )
2487{
2488 tl_assert(!vts_set);
2489 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2490 HG_(free),
2491 (Word(*)(UWord,UWord))VTS__cmp_structural );
2492 tl_assert(vts_set);
2493}
2494
sewardj7aa38a92011-02-27 23:04:12 +00002495/* Given a VTS, look in vts_set to see if we already have a
2496 structurally identical one. If yes, return the pair (True, pointer
2497 to the existing one). If no, clone this one, add the clone to the
2498 set, and return (False, pointer to the clone). */
2499static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002500{
2501 UWord keyW, valW;
sewardj7aa38a92011-02-27 23:04:12 +00002502 stats__vts_set__focaa++;
2503 tl_assert(cand->id == VtsID_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00002504 /* lookup cand (by value) */
2505 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2506 /* found it */
2507 tl_assert(valW == 0);
2508 /* if this fails, cand (by ref) was already present (!) */
2509 tl_assert(keyW != (UWord)cand);
sewardj7aa38a92011-02-27 23:04:12 +00002510 *res = (VTS*)keyW;
2511 return True;
sewardjf98e1c02008-10-25 16:22:41 +00002512 } else {
sewardj7aa38a92011-02-27 23:04:12 +00002513 /* not present. Clone, add and return address of clone. */
2514 stats__vts_set__focaa_a++;
2515 VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2516 tl_assert(clone != cand);
2517 VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2518 *res = clone;
2519 return False;
sewardjf98e1c02008-10-25 16:22:41 +00002520 }
2521}
2522
2523
2524/////////////////////////////////////////////////////////
2525// //
2526// VTS table //
2527// //
2528/////////////////////////////////////////////////////////
2529
2530static void VtsID__invalidate_caches ( void ); /* fwds */
2531
2532/* A type to hold VTS table entries. Invariants:
2533 If .vts == NULL, then this entry is not in use, so:
2534 - .rc == 0
2535 - this entry is on the freelist (unfortunately, does not imply
sewardjffce8152011-06-24 10:09:41 +00002536 any constraints on value for .freelink)
sewardjf98e1c02008-10-25 16:22:41 +00002537 If .vts != NULL, then this entry is in use:
2538 - .vts is findable in vts_set
2539 - .vts->id == this entry number
2540 - no specific value for .rc (even 0 is OK)
sewardjffce8152011-06-24 10:09:41 +00002541 - this entry is not on freelist, so .freelink == VtsID_INVALID
sewardjf98e1c02008-10-25 16:22:41 +00002542*/
2543typedef
2544 struct {
2545 VTS* vts; /* vts, in vts_set */
2546 UWord rc; /* reference count - enough for entire aspace */
2547 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
sewardjffce8152011-06-24 10:09:41 +00002548 VtsID remap; /* used only during pruning */
sewardjf98e1c02008-10-25 16:22:41 +00002549 }
2550 VtsTE;
2551
2552/* The VTS table. */
2553static XArray* /* of VtsTE */ vts_tab = NULL;
2554
2555/* An index into the VTS table, indicating the start of the list of
2556 free (available for use) entries. If the list is empty, this is
2557 VtsID_INVALID. */
2558static VtsID vts_tab_freelist = VtsID_INVALID;
2559
2560/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2561 vts_tab equals or exceeds this size. After GC, the value here is
2562 set appropriately so as to check for the next GC point. */
2563static Word vts_next_GC_at = 1000;
2564
2565static void vts_tab_init ( void )
2566{
2567 vts_tab
2568 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2569 HG_(free), sizeof(VtsTE) );
2570 vts_tab_freelist
2571 = VtsID_INVALID;
2572 tl_assert(vts_tab);
2573}
2574
2575/* Add ii to the free list, checking that it looks out-of-use. */
2576static void add_to_free_list ( VtsID ii )
2577{
2578 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2579 tl_assert(ie->vts == NULL);
2580 tl_assert(ie->rc == 0);
2581 tl_assert(ie->freelink == VtsID_INVALID);
2582 ie->freelink = vts_tab_freelist;
2583 vts_tab_freelist = ii;
2584}
2585
2586/* Get an entry from the free list. This will return VtsID_INVALID if
2587 the free list is empty. */
2588static VtsID get_from_free_list ( void )
2589{
2590 VtsID ii;
2591 VtsTE* ie;
2592 if (vts_tab_freelist == VtsID_INVALID)
2593 return VtsID_INVALID;
2594 ii = vts_tab_freelist;
2595 ie = VG_(indexXA)( vts_tab, ii );
2596 tl_assert(ie->vts == NULL);
2597 tl_assert(ie->rc == 0);
2598 vts_tab_freelist = ie->freelink;
2599 return ii;
2600}
2601
2602/* Produce a new VtsID that can be used, either by getting it from
2603 the freelist, or, if that is empty, by expanding vts_tab. */
2604static VtsID get_new_VtsID ( void )
2605{
2606 VtsID ii;
2607 VtsTE te;
2608 ii = get_from_free_list();
2609 if (ii != VtsID_INVALID)
2610 return ii;
2611 te.vts = NULL;
2612 te.rc = 0;
2613 te.freelink = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00002614 te.remap = VtsID_INVALID;
sewardjf98e1c02008-10-25 16:22:41 +00002615 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2616 return ii;
2617}
2618
2619
2620/* Indirect callback from lib_zsm. */
2621static void VtsID__rcinc ( VtsID ii )
2622{
2623 VtsTE* ie;
2624 /* VG_(indexXA) does a range check for us */
2625 ie = VG_(indexXA)( vts_tab, ii );
2626 tl_assert(ie->vts); /* else it's not in use */
2627 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2628 tl_assert(ie->vts->id == ii);
2629 ie->rc++;
2630}
2631
2632/* Indirect callback from lib_zsm. */
2633static void VtsID__rcdec ( VtsID ii )
2634{
2635 VtsTE* ie;
2636 /* VG_(indexXA) does a range check for us */
2637 ie = VG_(indexXA)( vts_tab, ii );
2638 tl_assert(ie->vts); /* else it's not in use */
2639 tl_assert(ie->rc > 0); /* else RC snafu */
2640 tl_assert(ie->vts->id == ii);
2641 ie->rc--;
2642}
2643
2644
sewardj7aa38a92011-02-27 23:04:12 +00002645/* Look up 'cand' in our collection of VTSs. If present, return the
2646 VtsID for the pre-existing version. If not present, clone it, add
2647 the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2648 it, and return that. */
2649static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
sewardjf98e1c02008-10-25 16:22:41 +00002650{
sewardj7aa38a92011-02-27 23:04:12 +00002651 VTS* in_tab = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002652 tl_assert(cand->id == VtsID_INVALID);
sewardj7aa38a92011-02-27 23:04:12 +00002653 Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2654 tl_assert(in_tab);
2655 if (already_have) {
2656 /* We already have a copy of 'cand'. Use that. */
sewardjf98e1c02008-10-25 16:22:41 +00002657 VtsTE* ie;
sewardj7aa38a92011-02-27 23:04:12 +00002658 tl_assert(in_tab->id != VtsID_INVALID);
2659 ie = VG_(indexXA)( vts_tab, in_tab->id );
2660 tl_assert(ie->vts == in_tab);
2661 return in_tab->id;
sewardjf98e1c02008-10-25 16:22:41 +00002662 } else {
2663 VtsID ii = get_new_VtsID();
2664 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
sewardj7aa38a92011-02-27 23:04:12 +00002665 ie->vts = in_tab;
sewardjf98e1c02008-10-25 16:22:41 +00002666 ie->rc = 0;
2667 ie->freelink = VtsID_INVALID;
sewardj7aa38a92011-02-27 23:04:12 +00002668 in_tab->id = ii;
sewardjf98e1c02008-10-25 16:22:41 +00002669 return ii;
2670 }
2671}
2672
2673
2674static void show_vts_stats ( HChar* caller )
2675{
2676 UWord nSet, nTab, nLive;
2677 ULong totrc;
2678 UWord n, i;
2679 nSet = VG_(sizeFM)( vts_set );
2680 nTab = VG_(sizeXA)( vts_tab );
2681 totrc = 0;
2682 nLive = 0;
2683 n = VG_(sizeXA)( vts_tab );
2684 for (i = 0; i < n; i++) {
2685 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2686 if (ie->vts) {
2687 nLive++;
2688 totrc += (ULong)ie->rc;
2689 } else {
2690 tl_assert(ie->rc == 0);
2691 }
2692 }
2693 VG_(printf)(" show_vts_stats %s\n", caller);
2694 VG_(printf)(" vts_tab size %4lu\n", nTab);
2695 VG_(printf)(" vts_tab live %4lu\n", nLive);
2696 VG_(printf)(" vts_set size %4lu\n", nSet);
2697 VG_(printf)(" total rc %4llu\n", totrc);
2698}
2699
sewardjffce8152011-06-24 10:09:41 +00002700
2701/* --- Helpers for VtsID pruning --- */
2702
2703static
2704void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2705 /*MOD*/XArray* /* of VtsTE */ new_tab,
2706 VtsID* ii )
2707{
2708 VtsTE *old_te, *new_te;
2709 VtsID old_id, new_id;
2710 /* We're relying here on VG_(indexXA)'s range checking to assert on
2711 any stupid values, in particular *ii == VtsID_INVALID. */
2712 old_id = *ii;
2713 old_te = VG_(indexXA)( old_tab, old_id );
2714 old_te->rc--;
2715 new_id = old_te->remap;
2716 new_te = VG_(indexXA)( new_tab, new_id );
2717 new_te->rc++;
2718 *ii = new_id;
2719}
2720
2721static
2722void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2723 /*MOD*/XArray* /* of VtsTE */ new_tab,
2724 SVal* s )
2725{
2726 SVal old_sv, new_sv;
2727 old_sv = *s;
2728 if (SVal__isC(old_sv)) {
2729 VtsID rMin, wMin;
2730 rMin = SVal__unC_Rmin(old_sv);
2731 wMin = SVal__unC_Wmin(old_sv);
2732 remap_VtsID( old_tab, new_tab, &rMin );
2733 remap_VtsID( old_tab, new_tab, &wMin );
2734 new_sv = SVal__mkC( rMin, wMin );
2735 *s = new_sv;
2736 }
2737}
2738
2739
sewardjf98e1c02008-10-25 16:22:41 +00002740/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002741__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002742static void vts_tab__do_GC ( Bool show_stats )
2743{
2744 UWord i, nTab, nLive, nFreed;
2745
sewardjffce8152011-06-24 10:09:41 +00002746 /* ---------- BEGIN VTS GC ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00002747 /* check this is actually necessary. */
2748 tl_assert(vts_tab_freelist == VtsID_INVALID);
2749
2750 /* empty the caches for partial order checks and binary joins. We
2751 could do better and prune out the entries to be deleted, but it
2752 ain't worth the hassle. */
2753 VtsID__invalidate_caches();
2754
2755 /* First, make the reference counts up to date. */
2756 zsm_flush_cache();
2757
2758 nTab = VG_(sizeXA)( vts_tab );
2759
2760 if (show_stats) {
2761 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2762 show_vts_stats("before GC");
2763 }
2764
sewardjffce8152011-06-24 10:09:41 +00002765 /* Now we can inspect the entire vts_tab. Any entries with zero
2766 .rc fields are now no longer in use and can be put back on the
sewardjf98e1c02008-10-25 16:22:41 +00002767 free list, removed from vts_set, and deleted. */
2768 nFreed = 0;
2769 for (i = 0; i < nTab; i++) {
2770 Bool present;
sewardjffce8152011-06-24 10:09:41 +00002771 UWord oldK = 0, oldV = 12345;
sewardjf98e1c02008-10-25 16:22:41 +00002772 VtsTE* te = VG_(indexXA)( vts_tab, i );
2773 if (te->vts == NULL) {
2774 tl_assert(te->rc == 0);
2775 continue; /* already on the free list (presumably) */
2776 }
2777 if (te->rc > 0)
2778 continue; /* in use */
2779 /* Ok, we got one we can free. */
2780 tl_assert(te->vts->id == i);
2781 /* first, remove it from vts_set. */
2782 present = VG_(delFromFM)( vts_set,
2783 &oldK, &oldV, (UWord)te->vts );
2784 tl_assert(present); /* else it isn't in vts_set ?! */
2785 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2786 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2787 /* now free the VTS itself */
2788 VTS__delete(te->vts);
2789 te->vts = NULL;
2790 /* and finally put this entry on the free list */
2791 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2792 add_to_free_list( i );
2793 nFreed++;
2794 }
2795
2796 /* Now figure out when the next GC should be. We'll allow the
2797 number of VTSs to double before GCing again. Except of course
2798 that since we can't (or, at least, don't) shrink vts_tab, we
2799 can't set the threshhold value smaller than it. */
2800 tl_assert(nFreed <= nTab);
2801 nLive = nTab - nFreed;
2802 tl_assert(nLive >= 0 && nLive <= nTab);
2803 vts_next_GC_at = 2 * nLive;
2804 if (vts_next_GC_at < nTab)
2805 vts_next_GC_at = nTab;
2806
2807 if (show_stats) {
2808 show_vts_stats("after GC");
2809 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2810 }
2811
sewardj5e2ac3b2009-08-11 10:39:25 +00002812 if (VG_(clo_stats)) {
sewardjffce8152011-06-24 10:09:41 +00002813 static UInt ctr = 1;
sewardjf98e1c02008-10-25 16:22:41 +00002814 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002815 VG_(message)(Vg_DebugMsg,
sewardj24118492009-07-15 14:50:02 +00002816 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
sewardj8aa41de2009-01-22 12:24:26 +00002817 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00002818 }
sewardjffce8152011-06-24 10:09:41 +00002819 /* ---------- END VTS GC ---------- */
2820
2821 /* Decide whether to do VTS pruning. We have one of three
2822 settings. */
2823 static UInt pruning_auto_ctr = 0; /* do not make non-static */
2824
2825 Bool do_pruning = False;
2826 switch (HG_(clo_vts_pruning)) {
2827 case 0: /* never */
2828 break;
2829 case 1: /* auto */
2830 do_pruning = (++pruning_auto_ctr % 5) == 0;
2831 break;
2832 case 2: /* always */
2833 do_pruning = True;
2834 break;
2835 default:
2836 tl_assert(0);
2837 }
2838
2839 /* The rest of this routine only handles pruning, so we can
2840 quit at this point if it is not to be done. */
2841 if (!do_pruning)
2842 return;
2843
2844 /* ---------- BEGIN VTS PRUNING ---------- */
2845 /* We begin by sorting the backing table on its .thr values, so as
2846 to (1) check they are unique [else something has gone wrong,
2847 since it means we must have seen some Thr* exiting more than
2848 once, which can't happen], and (2) so that we can quickly look
2849 up the dead-thread entries as we work through the VTSs. */
2850 VG_(sortXA)( verydead_thread_table );
2851 /* Sanity check: check for unique .sts.thr values. */
2852 UWord nBT = VG_(sizeXA)( verydead_thread_table );
2853 if (nBT > 0) {
2854 ThrID thrid1, thrid2;
2855 thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, 0 );
2856 for (i = 1; i < nBT; i++) {
2857 thrid1 = thrid2;
2858 thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, i );
2859 tl_assert(thrid1 < thrid2);
2860 }
2861 }
2862 /* Ok, so the dead thread table has unique and in-order keys. */
2863
2864 /* We will run through the old table, and create a new table and
2865 set, at the same time setting the .remap entries in the old
2866 table to point to the new entries. Then, visit every VtsID in
2867 the system, and replace all of them with new ones, using the
2868 .remap entries in the old table. Finally, we can delete the old
2869 table and set. */
2870
2871 XArray* /* of VtsTE */ new_tab
2872 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
2873 HG_(free), sizeof(VtsTE) );
2874
2875 /* WordFM VTS* void */
2876 WordFM* new_set
2877 = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
2878 HG_(free),
2879 (Word(*)(UWord,UWord))VTS__cmp_structural );
2880
2881 /* Visit each old VTS. For each one:
2882
2883 * make a pruned version
2884
2885 * search new_set for the pruned version, yielding either
2886 Nothing (not present) or the new VtsID for it.
2887
2888 * if not present, allocate a new VtsID for it, insert (pruned
2889 VTS, new VtsID) in the tree, and set
2890 remap_table[old VtsID] = new VtsID.
2891
2892 * if present, set remap_table[old VtsID] = new VtsID, where
2893 new VtsID was determined by the tree lookup. Then free up
2894 the clone.
2895 */
2896
2897 UWord nBeforePruning = 0, nAfterPruning = 0;
2898 UWord nSTSsBefore = 0, nSTSsAfter = 0;
2899 VtsID new_VtsID_ctr = 0;
2900
2901 for (i = 0; i < nTab; i++) {
2902
2903 /* For each old VTS .. */
2904 VtsTE* old_te = VG_(indexXA)( vts_tab, i );
2905 VTS* old_vts = old_te->vts;
2906 tl_assert(old_te->remap == VtsID_INVALID);
2907
2908 /* Skip it if not in use */
2909 if (old_te->rc == 0) {
2910 tl_assert(old_vts == NULL);
2911 continue;
2912 }
2913 tl_assert(old_vts != NULL);
2914 tl_assert(old_vts->id == i);
2915 tl_assert(old_vts->ts != NULL);
2916
2917 /* It is in use. Make a pruned version. */
2918 nBeforePruning++;
2919 nSTSsBefore += old_vts->usedTS;
2920 VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
2921 old_vts, verydead_thread_table);
2922 tl_assert(new_vts->sizeTS == new_vts->usedTS);
2923 tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
2924 == 0x0ddC0ffeeBadF00dULL);
2925
2926 /* Get rid of the old VTS and the tree entry. It's a bit more
2927 complex to incrementally delete the VTSs now than to nuke
2928 them all after we're done, but the upside is that we don't
2929 wind up temporarily storing potentially two complete copies
2930 of each VTS and hence spiking memory use. */
2931 UWord oldK = 0, oldV = 12345;
2932 Bool present = VG_(delFromFM)( vts_set,
2933 &oldK, &oldV, (UWord)old_vts );
2934 tl_assert(present); /* else it isn't in vts_set ?! */
2935 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2936 tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
2937 /* now free the VTS itself */
2938 VTS__delete(old_vts);
2939 old_te->vts = NULL;
2940 old_vts = NULL;
2941
2942 /* NO MENTIONS of old_vts allowed beyond this point. */
2943
2944 /* Ok, we have the pruned copy in new_vts. See if a
2945 structurally identical version is already present in new_set.
2946 If so, delete the one we just made and move on; if not, add
2947 it. */
2948 VTS* identical_version = NULL;
2949 UWord valW = 12345;
2950 if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
2951 (UWord)new_vts)) {
2952 // already have it
2953 tl_assert(valW == 0);
2954 tl_assert(identical_version != NULL);
2955 tl_assert(identical_version != new_vts);
2956 VTS__delete(new_vts);
2957 new_vts = identical_version;
2958 tl_assert(new_vts->id != VtsID_INVALID);
2959 } else {
2960 tl_assert(valW == 12345);
2961 tl_assert(identical_version == NULL);
2962 new_vts->id = new_VtsID_ctr++;
2963 Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
2964 tl_assert(!b);
2965 VtsTE new_te;
2966 new_te.vts = new_vts;
2967 new_te.rc = 0;
2968 new_te.freelink = VtsID_INVALID;
2969 new_te.remap = VtsID_INVALID;
2970 Word j = VG_(addToXA)( new_tab, &new_te );
2971 tl_assert(j <= i);
2972 tl_assert(j == new_VtsID_ctr - 1);
2973 // stats
2974 nAfterPruning++;
2975 nSTSsAfter += new_vts->usedTS;
2976 }
2977 old_te->remap = new_vts->id;
2978
2979 } /* for (i = 0; i < nTab; i++) */
2980
2981 /* At this point, we have:
2982 * the old VTS table, with its .remap entries set,
2983 and with all .vts == NULL.
2984 * the old VTS tree should be empty, since it and the old VTSs
2985 it contained have been incrementally deleted was we worked
2986 through the old table.
2987 * the new VTS table, with all .rc == 0, all .freelink and .remap
2988 == VtsID_INVALID.
2989 * the new VTS tree.
2990 */
2991 tl_assert( VG_(sizeFM)(vts_set) == 0 );
2992
2993 /* Now actually apply the mapping. */
2994 /* Visit all the VtsIDs in the entire system. Where do we expect
2995 to find them?
2996 (a) in shadow memory -- the LineZs and LineFs
2997 (b) in our collection of struct _Thrs.
2998 (c) in our collection of struct _SOs.
2999 Nowhere else, AFAICS. Not in the zsm cache, because that just
3000 got invalidated.
3001
3002 Using the .remap fields in vts_tab, map each old VtsID to a new
3003 VtsID. For each old VtsID, dec its rc; and for each new one,
3004 inc it. This sets up the new refcounts, and it also gives a
3005 cheap sanity check of the old ones: all old refcounts should be
3006 zero after this operation.
3007 */
3008
3009 /* Do the mappings for (a) above: iterate over the Primary shadow
3010 mem map (WordFM Addr SecMap*). */
3011 UWord secmapW = 0;
3012 VG_(initIterFM)( map_shmem );
3013 while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3014 UWord j;
3015 SecMap* sm = (SecMap*)secmapW;
3016 tl_assert(sm->magic == SecMap_MAGIC);
3017 /* Deal with the LineZs */
3018 for (i = 0; i < N_SECMAP_ZLINES; i++) {
3019 LineZ* lineZ = &sm->linesZ[i];
3020 if (lineZ->dict[0] == SVal_INVALID)
3021 continue; /* not in use -- data is in F rep instead */
3022 for (j = 0; j < 4; j++)
3023 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3024 }
3025 /* Deal with the LineFs */
3026 for (i = 0; i < sm->linesF_size; i++) {
3027 LineF* lineF = &sm->linesF[i];
3028 if (!lineF->inUse)
3029 continue;
3030 for (j = 0; j < N_LINE_ARANGE; j++)
3031 remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3032 }
3033 }
3034 VG_(doneIterFM)( map_shmem );
3035
3036 /* Do the mappings for (b) above: visit our collection of struct
3037 _Thrs. */
3038 Thread* hgthread = get_admin_threads();
3039 tl_assert(hgthread);
3040 while (hgthread) {
3041 Thr* hbthr = hgthread->hbthr;
3042 tl_assert(hbthr);
3043 /* Threads that are listed in the prunable set have their viR
3044 and viW set to VtsID_INVALID, so we can't mess with them. */
3045 if (hbthr->llexit_done && hbthr->joinedwith_done) {
3046 tl_assert(hbthr->viR == VtsID_INVALID);
3047 tl_assert(hbthr->viW == VtsID_INVALID);
3048 hgthread = hgthread->admin;
3049 continue;
3050 }
3051 remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3052 remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3053 hgthread = hgthread->admin;
3054 }
3055
3056 /* Do the mappings for (c) above: visit the struct _SOs. */
3057 SO* so = admin_SO;
3058 while (so) {
3059 if (so->viR != VtsID_INVALID)
3060 remap_VtsID( vts_tab, new_tab, &so->viR );
3061 if (so->viW != VtsID_INVALID)
3062 remap_VtsID( vts_tab, new_tab, &so->viW );
3063 so = so->admin_next;
3064 }
3065
3066 /* So, we're nearly done (with this incredibly complex operation).
3067 Check the refcounts for the old VtsIDs all fell to zero, as
3068 expected. Any failure is serious. */
3069 for (i = 0; i < nTab; i++) {
3070 VtsTE* te = VG_(indexXA)( vts_tab, i );
3071 tl_assert(te->vts == NULL);
3072 /* This is the assert proper. Note we're also asserting
3073 zeroness for old entries which are unmapped (hence have
3074 .remap == VtsID_INVALID). That's OK. */
3075 tl_assert(te->rc == 0);
3076 }
3077
3078 /* Install the new table and set. */
3079 VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3080 vts_set = new_set;
3081 VG_(deleteXA)( vts_tab );
3082 vts_tab = new_tab;
3083
3084 /* The freelist of vts_tab entries is empty now, because we've
3085 compacted all of the live entries at the low end of the
3086 table. */
3087 vts_tab_freelist = VtsID_INVALID;
3088
3089 /* Sanity check vts_set and vts_tab. */
3090
3091 /* Because all the live entries got slid down to the bottom of vts_tab: */
3092 tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3093
3094 /* Assert that the vts_tab and vts_set entries point at each other
3095 in the required way */
3096 UWord wordK = 0, wordV = 0;
3097 VG_(initIterFM)( vts_set );
3098 while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3099 tl_assert(wordK != 0);
3100 tl_assert(wordV == 0);
3101 VTS* vts = (VTS*)wordK;
3102 tl_assert(vts->id != VtsID_INVALID);
3103 VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3104 tl_assert(te->vts == vts);
3105 }
3106 VG_(doneIterFM)( vts_set );
3107
3108 /* Also iterate over the table, and check each entry is
3109 plausible. */
3110 nTab = VG_(sizeXA)( vts_tab );
3111 for (i = 0; i < nTab; i++) {
3112 VtsTE* te = VG_(indexXA)( vts_tab, i );
3113 tl_assert(te->vts);
3114 tl_assert(te->vts->id == i);
3115 tl_assert(te->rc > 0); /* 'cos we just GC'd */
3116 tl_assert(te->freelink == VtsID_INVALID); /* in use */
3117 tl_assert(te->remap == VtsID_INVALID); /* not relevant */
3118 }
3119
3120 /* And we're done. Bwahahaha. Ha. Ha. Ha. */
3121 if (VG_(clo_stats)) {
3122 static UInt ctr = 1;
3123 tl_assert(nTab > 0);
3124 VG_(message)(
3125 Vg_DebugMsg,
3126 "libhb: VTS PR: #%u before %lu (avg sz %lu) "
3127 "after %lu (avg sz %lu)\n",
3128 ctr++,
3129 nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3130 nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3131 );
3132 }
3133 if (0)
3134 VG_(printf)("VTQ: before pruning %lu (avg sz %lu), "
3135 "after pruning %lu (avg sz %lu)\n",
3136 nBeforePruning, nSTSsBefore / nBeforePruning,
3137 nAfterPruning, nSTSsAfter / nAfterPruning);
3138 /* ---------- END VTS PRUNING ---------- */
sewardjf98e1c02008-10-25 16:22:41 +00003139}
3140
3141
3142/////////////////////////////////////////////////////////
3143// //
3144// Vts IDs //
3145// //
3146/////////////////////////////////////////////////////////
3147
3148//////////////////////////
sewardj7aa38a92011-02-27 23:04:12 +00003149/* A temporary, max-sized VTS which is used as a temporary (the first
3150 argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3151static VTS* temp_max_sized_VTS = NULL;
3152
3153//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00003154static ULong stats__cmpLEQ_queries = 0;
3155static ULong stats__cmpLEQ_misses = 0;
3156static ULong stats__join2_queries = 0;
3157static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003158
3159static inline UInt ROL32 ( UInt w, Int n ) {
3160 w = (w << n) | (w >> (32-n));
3161 return w;
3162}
3163static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3164 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3165 return hash % nTab;
3166}
3167
sewardj23f12002009-07-24 08:45:08 +00003168#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00003169static
sewardj23f12002009-07-24 08:45:08 +00003170 struct { VtsID vi1; VtsID vi2; Bool leq; }
3171 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00003172
3173#define N_JOIN2_CACHE 1023
3174static
3175 struct { VtsID vi1; VtsID vi2; VtsID res; }
3176 join2_cache[N_JOIN2_CACHE];
3177
3178static void VtsID__invalidate_caches ( void ) {
3179 Int i;
sewardj23f12002009-07-24 08:45:08 +00003180 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3181 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3182 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3183 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00003184 }
3185 for (i = 0; i < N_JOIN2_CACHE; i++) {
3186 join2_cache[i].vi1 = VtsID_INVALID;
3187 join2_cache[i].vi2 = VtsID_INVALID;
3188 join2_cache[i].res = VtsID_INVALID;
3189 }
3190}
3191//////////////////////////
3192
sewardjd52392d2008-11-08 20:36:26 +00003193//static Bool VtsID__is_valid ( VtsID vi ) {
3194// VtsTE* ve;
3195// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3196// return False;
3197// ve = VG_(indexXA)( vts_tab, vi );
3198// if (!ve->vts)
3199// return False;
3200// tl_assert(ve->vts->id == vi);
3201// return True;
3202//}
sewardjf98e1c02008-10-25 16:22:41 +00003203
3204static VTS* VtsID__to_VTS ( VtsID vi ) {
3205 VtsTE* te = VG_(indexXA)( vts_tab, vi );
3206 tl_assert(te->vts);
3207 return te->vts;
3208}
3209
3210static void VtsID__pp ( VtsID vi ) {
3211 HChar buf[100];
3212 VTS* vts = VtsID__to_VTS(vi);
3213 VTS__show( buf, sizeof(buf)-1, vts );
3214 buf[sizeof(buf)-1] = 0;
3215 VG_(printf)("%s", buf);
3216}
3217
3218/* compute partial ordering relation of vi1 and vi2. */
3219__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00003220static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00003221 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00003222 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00003223 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00003224 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00003225 tl_assert(vi1 != vi2);
3226 ////++
sewardj23f12002009-07-24 08:45:08 +00003227 stats__cmpLEQ_queries++;
3228 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3229 if (cmpLEQ_cache[hash].vi1 == vi1
3230 && cmpLEQ_cache[hash].vi2 == vi2)
3231 return cmpLEQ_cache[hash].leq;
3232 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00003233 ////--
3234 v1 = VtsID__to_VTS(vi1);
3235 v2 = VtsID__to_VTS(vi2);
sewardje4cce742011-02-24 15:25:24 +00003236 leq = VTS__cmpLEQ( v1, v2 ) == 0;
sewardjf98e1c02008-10-25 16:22:41 +00003237 ////++
sewardj23f12002009-07-24 08:45:08 +00003238 cmpLEQ_cache[hash].vi1 = vi1;
3239 cmpLEQ_cache[hash].vi2 = vi2;
3240 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00003241 ////--
sewardj23f12002009-07-24 08:45:08 +00003242 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00003243}
sewardj23f12002009-07-24 08:45:08 +00003244static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3245 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003246}
3247
3248/* compute binary join */
3249__attribute__((noinline))
3250static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3251 UInt hash;
3252 VtsID res;
sewardj7aa38a92011-02-27 23:04:12 +00003253 VTS *vts1, *vts2;
sewardjf98e1c02008-10-25 16:22:41 +00003254 //if (vi1 == vi2) return vi1;
3255 tl_assert(vi1 != vi2);
3256 ////++
3257 stats__join2_queries++;
3258 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3259 if (join2_cache[hash].vi1 == vi1
3260 && join2_cache[hash].vi2 == vi2)
3261 return join2_cache[hash].res;
3262 stats__join2_misses++;
3263 ////--
3264 vts1 = VtsID__to_VTS(vi1);
3265 vts2 = VtsID__to_VTS(vi2);
sewardj7aa38a92011-02-27 23:04:12 +00003266 temp_max_sized_VTS->usedTS = 0;
3267 VTS__join(temp_max_sized_VTS, vts1,vts2);
3268 res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003269 ////++
3270 join2_cache[hash].vi1 = vi1;
3271 join2_cache[hash].vi2 = vi2;
3272 join2_cache[hash].res = res;
3273 ////--
3274 return res;
3275}
3276static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003277 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00003278}
3279
3280/* create a singleton VTS, namely [thr:1] */
3281static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
sewardj7aa38a92011-02-27 23:04:12 +00003282 temp_max_sized_VTS->usedTS = 0;
3283 VTS__singleton(temp_max_sized_VTS, thr,tym);
3284 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003285}
3286
3287/* tick operation, creates value 1 if specified index is absent */
3288static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3289 VTS* vts = VtsID__to_VTS(vi);
sewardj7aa38a92011-02-27 23:04:12 +00003290 temp_max_sized_VTS->usedTS = 0;
3291 VTS__tick(temp_max_sized_VTS, idx,vts);
3292 return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
sewardjf98e1c02008-10-25 16:22:41 +00003293}
3294
3295/* index into a VTS (only for assertions) */
3296static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3297 VTS* vts = VtsID__to_VTS(vi);
3298 return VTS__indexAt_SLOW( vts, idx );
3299}
3300
sewardj23f12002009-07-24 08:45:08 +00003301/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3302 any, really) element in vi1 which is pointwise greater-than the
3303 corresponding element in vi2. If no such element exists, return
3304 NULL. This needs to be fairly quick since it is called every time
3305 a race is detected. */
3306static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3307{
3308 VTS *vts1, *vts2;
sewardje4cce742011-02-24 15:25:24 +00003309 Thr* diffthr;
3310 ThrID diffthrid;
sewardj23f12002009-07-24 08:45:08 +00003311 tl_assert(vi1 != vi2);
3312 vts1 = VtsID__to_VTS(vi1);
3313 vts2 = VtsID__to_VTS(vi2);
3314 tl_assert(vts1 != vts2);
sewardje4cce742011-02-24 15:25:24 +00003315 diffthrid = VTS__cmpLEQ(vts1, vts2);
3316 diffthr = Thr__from_ThrID(diffthrid);
sewardj23f12002009-07-24 08:45:08 +00003317 tl_assert(diffthr); /* else they are LEQ ! */
3318 return diffthr;
3319}
3320
3321
3322/////////////////////////////////////////////////////////
3323// //
3324// Filters //
3325// //
3326/////////////////////////////////////////////////////////
3327
sewardj23f12002009-07-24 08:45:08 +00003328/* Forget everything we know -- clear the filter and let everything
3329 through. This needs to be as fast as possible, since it is called
3330 every time the running thread changes, and every time a thread's
3331 vector clocks change, which can be quite frequent. The obvious
3332 fast way to do this is simply to stuff in tags which we know are
3333 not going to match anything, since they're not aligned to the start
3334 of a line. */
3335static void Filter__clear ( Filter* fi, HChar* who )
3336{
3337 UWord i;
3338 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
3339 for (i = 0; i < FI_NUM_LINES; i += 8) {
3340 fi->tags[i+0] = 1; /* impossible value -- cannot match */
3341 fi->tags[i+1] = 1;
3342 fi->tags[i+2] = 1;
3343 fi->tags[i+3] = 1;
3344 fi->tags[i+4] = 1;
3345 fi->tags[i+5] = 1;
3346 fi->tags[i+6] = 1;
3347 fi->tags[i+7] = 1;
3348 }
3349 tl_assert(i == FI_NUM_LINES);
3350}
3351
3352/* Clearing an arbitrary range in the filter. Unfortunately
3353 we have to do this due to core-supplied new/die-mem events. */
3354
3355static void Filter__clear_1byte ( Filter* fi, Addr a )
3356{
3357 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3358 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3359 FiLine* line = &fi->lines[lineno];
3360 UWord loff = (a - atag) / 8;
3361 UShort mask = 0x3 << (2 * (a & 7));
3362 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3363 if (LIKELY( fi->tags[lineno] == atag )) {
3364 /* hit. clear the bits. */
3365 UShort u16 = line->u16s[loff];
3366 line->u16s[loff] = u16 & ~mask; /* clear them */
3367 } else {
3368 /* miss. The filter doesn't hold this address, so ignore. */
3369 }
3370}
3371
3372static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3373{
3374 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3375 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3376 FiLine* line = &fi->lines[lineno];
3377 UWord loff = (a - atag) / 8;
3378 if (LIKELY( fi->tags[lineno] == atag )) {
3379 line->u16s[loff] = 0;
3380 } else {
3381 /* miss. The filter doesn't hold this address, so ignore. */
3382 }
3383}
3384
3385static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3386{
3387 //VG_(printf)("%lu ", len);
3388 /* slowly do part preceding 8-alignment */
3389 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3390 Filter__clear_1byte( fi, a );
3391 a++;
3392 len--;
3393 }
3394 /* vector loop */
3395 while (len >= 8) {
3396 Filter__clear_8bytes_aligned( fi, a );
3397 a += 8;
3398 len -= 8;
3399 }
3400 /* slowly do tail */
3401 while (UNLIKELY(len > 0)) {
3402 Filter__clear_1byte( fi, a );
3403 a++;
3404 len--;
3405 }
3406}
3407
3408
3409/* ------ Read handlers for the filter. ------ */
3410
3411static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3412{
3413 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3414 return False;
3415 {
3416 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3417 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3418 FiLine* line = &fi->lines[lineno];
3419 UWord loff = (a - atag) / 8;
3420 UShort mask = 0xAAAA;
3421 if (LIKELY( fi->tags[lineno] == atag )) {
3422 /* hit. check line and update. */
3423 UShort u16 = line->u16s[loff];
3424 Bool ok = (u16 & mask) == mask; /* all R bits set? */
3425 line->u16s[loff] = u16 | mask; /* set them */
3426 return ok;
3427 } else {
3428 /* miss. nuke existing line and re-use it. */
3429 UWord i;
3430 fi->tags[lineno] = atag;
3431 for (i = 0; i < FI_LINE_SZB / 8; i++)
3432 line->u16s[i] = 0;
3433 line->u16s[loff] = mask;
3434 return False;
3435 }
3436 }
3437}
3438
3439static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3440{
3441 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3442 return False;
3443 {
3444 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3445 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3446 FiLine* line = &fi->lines[lineno];
3447 UWord loff = (a - atag) / 8;
3448 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3449 if (LIKELY( fi->tags[lineno] == atag )) {
3450 /* hit. check line and update. */
3451 UShort u16 = line->u16s[loff];
3452 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
3453 line->u16s[loff] = u16 | mask; /* set them */
3454 return ok;
3455 } else {
3456 /* miss. nuke existing line and re-use it. */
3457 UWord i;
3458 fi->tags[lineno] = atag;
3459 for (i = 0; i < FI_LINE_SZB / 8; i++)
3460 line->u16s[i] = 0;
3461 line->u16s[loff] = mask;
3462 return False;
3463 }
3464 }
3465}
3466
3467static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3468{
3469 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3470 return False;
3471 {
3472 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3473 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3474 FiLine* line = &fi->lines[lineno];
3475 UWord loff = (a - atag) / 8;
3476 UShort mask = 0xA << (2 * (a & 6));
3477 /* mask is A000, 0A00, 00A0 or 000A */
3478 if (LIKELY( fi->tags[lineno] == atag )) {
3479 /* hit. check line and update. */
3480 UShort u16 = line->u16s[loff];
3481 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
3482 line->u16s[loff] = u16 | mask; /* set them */
3483 return ok;
3484 } else {
3485 /* miss. nuke existing line and re-use it. */
3486 UWord i;
3487 fi->tags[lineno] = atag;
3488 for (i = 0; i < FI_LINE_SZB / 8; i++)
3489 line->u16s[i] = 0;
3490 line->u16s[loff] = mask;
3491 return False;
3492 }
3493 }
3494}
3495
3496static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3497{
3498 {
3499 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3500 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3501 FiLine* line = &fi->lines[lineno];
3502 UWord loff = (a - atag) / 8;
3503 UShort mask = 0x2 << (2 * (a & 7));
3504 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3505 if (LIKELY( fi->tags[lineno] == atag )) {
3506 /* hit. check line and update. */
3507 UShort u16 = line->u16s[loff];
3508 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3509 line->u16s[loff] = u16 | mask; /* set them */
3510 return ok;
3511 } else {
3512 /* miss. nuke existing line and re-use it. */
3513 UWord i;
3514 fi->tags[lineno] = atag;
3515 for (i = 0; i < FI_LINE_SZB / 8; i++)
3516 line->u16s[i] = 0;
3517 line->u16s[loff] = mask;
3518 return False;
3519 }
3520 }
3521}
3522
3523
3524/* ------ Write handlers for the filter. ------ */
3525
3526static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3527{
3528 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3529 return False;
3530 {
3531 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3532 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3533 FiLine* line = &fi->lines[lineno];
3534 UWord loff = (a - atag) / 8;
3535 UShort mask = 0xFFFF;
3536 if (LIKELY( fi->tags[lineno] == atag )) {
3537 /* hit. check line and update. */
3538 UShort u16 = line->u16s[loff];
3539 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
3540 line->u16s[loff] = u16 | mask; /* set them */
3541 return ok;
3542 } else {
3543 /* miss. nuke existing line and re-use it. */
3544 UWord i;
3545 fi->tags[lineno] = atag;
3546 for (i = 0; i < FI_LINE_SZB / 8; i++)
3547 line->u16s[i] = 0;
3548 line->u16s[loff] = mask;
3549 return False;
3550 }
3551 }
3552}
3553
3554static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3555{
3556 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3557 return False;
3558 {
3559 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3560 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3561 FiLine* line = &fi->lines[lineno];
3562 UWord loff = (a - atag) / 8;
3563 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3564 if (LIKELY( fi->tags[lineno] == atag )) {
3565 /* hit. check line and update. */
3566 UShort u16 = line->u16s[loff];
3567 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
3568 line->u16s[loff] = u16 | mask; /* set them */
3569 return ok;
3570 } else {
3571 /* miss. nuke existing line and re-use it. */
3572 UWord i;
3573 fi->tags[lineno] = atag;
3574 for (i = 0; i < FI_LINE_SZB / 8; i++)
3575 line->u16s[i] = 0;
3576 line->u16s[loff] = mask;
3577 return False;
3578 }
3579 }
3580}
3581
3582static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3583{
3584 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3585 return False;
3586 {
3587 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3588 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3589 FiLine* line = &fi->lines[lineno];
3590 UWord loff = (a - atag) / 8;
3591 UShort mask = 0xF << (2 * (a & 6));
3592 /* mask is F000, 0F00, 00F0 or 000F */
3593 if (LIKELY( fi->tags[lineno] == atag )) {
3594 /* hit. check line and update. */
3595 UShort u16 = line->u16s[loff];
3596 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
3597 line->u16s[loff] = u16 | mask; /* set them */
3598 return ok;
3599 } else {
3600 /* miss. nuke existing line and re-use it. */
3601 UWord i;
3602 fi->tags[lineno] = atag;
3603 for (i = 0; i < FI_LINE_SZB / 8; i++)
3604 line->u16s[i] = 0;
3605 line->u16s[loff] = mask;
3606 return False;
3607 }
3608 }
3609}
3610
3611static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3612{
3613 {
3614 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
3615 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
3616 FiLine* line = &fi->lines[lineno];
3617 UWord loff = (a - atag) / 8;
3618 UShort mask = 0x3 << (2 * (a & 7));
3619 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3620 if (LIKELY( fi->tags[lineno] == atag )) {
3621 /* hit. check line and update. */
3622 UShort u16 = line->u16s[loff];
3623 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
3624 line->u16s[loff] = u16 | mask; /* set them */
3625 return ok;
3626 } else {
3627 /* miss. nuke existing line and re-use it. */
3628 UWord i;
3629 fi->tags[lineno] = atag;
3630 for (i = 0; i < FI_LINE_SZB / 8; i++)
3631 line->u16s[i] = 0;
3632 line->u16s[loff] = mask;
3633 return False;
3634 }
3635 }
3636}
3637
sewardjf98e1c02008-10-25 16:22:41 +00003638
3639/////////////////////////////////////////////////////////
3640// //
3641// Threads //
3642// //
3643/////////////////////////////////////////////////////////
3644
sewardje4cce742011-02-24 15:25:24 +00003645/* Maps ThrID values to their Thr*s (which contain ThrID values that
3646 should point back to the relevant slot in the array. Lowest
3647 numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
3648static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
3649
3650/* And a counter to dole out ThrID values. For rationale/background,
3651 see comments on definition of ScalarTS (far) above. */
sewardj7aa38a92011-02-27 23:04:12 +00003652static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
sewardje4cce742011-02-24 15:25:24 +00003653
3654static ThrID Thr__to_ThrID ( Thr* thr ) {
3655 return thr->thrid;
3656}
3657static Thr* Thr__from_ThrID ( UInt thrid ) {
3658 Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
3659 tl_assert(thr->thrid == thrid);
3660 return thr;
3661}
3662
3663static Thr* Thr__new ( void )
3664{
sewardjf98e1c02008-10-25 16:22:41 +00003665 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
3666 thr->viR = VtsID_INVALID;
3667 thr->viW = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00003668 thr->llexit_done = False;
3669 thr->joinedwith_done = False;
sewardj23f12002009-07-24 08:45:08 +00003670 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
sewardj2d2ea2f2009-08-02 10:15:07 +00003671 /* We only really need this at history level 1, but unfortunately
3672 this routine is called before the command line processing is
3673 done (sigh), so we can't rely on HG_(clo_history_level) at this
3674 point. Hence always allocate it. Bah. */
sewardj8ab2c132009-08-02 09:34:35 +00003675 thr->local_Kws_n_stacks
sewardj2d2ea2f2009-08-02 10:15:07 +00003676 = VG_(newXA)( HG_(zalloc),
3677 "libhb.Thr__new.3 (local_Kws_and_stacks)",
sewardj23f12002009-07-24 08:45:08 +00003678 HG_(free), sizeof(ULong_n_EC) );
sewardje4cce742011-02-24 15:25:24 +00003679
3680 /* Add this Thr* <-> ThrID binding to the mapping, and
3681 cross-check */
3682 if (!thrid_to_thr_map) {
3683 thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
3684 HG_(free), sizeof(Thr*) );
3685 tl_assert(thrid_to_thr_map);
3686 }
3687
sewardj7aa38a92011-02-27 23:04:12 +00003688 if (thrid_counter >= ThrID_MAX_VALID) {
sewardje4cce742011-02-24 15:25:24 +00003689 /* We're hosed. We have to stop. */
3690 scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
3691 }
3692
3693 thr->thrid = thrid_counter++;
3694 Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
3695 tl_assert(ix + 1024 == thr->thrid);
3696
sewardjf98e1c02008-10-25 16:22:41 +00003697 return thr;
3698}
3699
sewardj8ab2c132009-08-02 09:34:35 +00003700static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00003701{
3702 Word nPresent;
3703 ULong_n_EC pair;
3704 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00003705
3706 // We only collect this info at history level 1 (approx)
3707 if (HG_(clo_history_level) != 1)
3708 return;
3709
sewardj8ab2c132009-08-02 09:34:35 +00003710 /* This is the scalar Kw for thr. */
3711 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00003712 pair.ec = main_get_EC( thr );
3713 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00003714 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00003715
3716 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00003717 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00003718
3719 /* Throw away old stacks, if necessary. We can't accumulate stuff
3720 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00003721 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
3722 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
3723 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
3724 if (0)
3725 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00003726 thr, pair.ull, pair.ec );
3727 }
3728
3729 if (nPresent > 0) {
3730 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00003731 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
3732 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00003733 }
3734
3735 if (nPresent == 0)
3736 pair.ec = NULL;
3737
sewardj8ab2c132009-08-02 09:34:35 +00003738 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00003739
3740 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00003741 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00003742 thr, pair.ull, pair.ec );
3743 if (0)
3744 VG_(pp_ExeContext)(pair.ec);
3745}
3746
3747static Int cmp__ULong_n_EC__by_ULong ( ULong_n_EC* pair1, ULong_n_EC* pair2 )
3748{
3749 if (pair1->ull < pair2->ull) return -1;
3750 if (pair1->ull > pair2->ull) return 1;
3751 return 0;
3752}
3753
sewardjf98e1c02008-10-25 16:22:41 +00003754
3755/////////////////////////////////////////////////////////
3756// //
3757// Shadow Values //
3758// //
3759/////////////////////////////////////////////////////////
3760
3761// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
3762// hb_zsm.h. We have to do everything else here.
3763
3764/* SVal is 64 bit unsigned int.
3765
3766 <---------30---------> <---------30--------->
3767 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00003768 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00003769 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
3770
sewardjf98e1c02008-10-25 16:22:41 +00003771*/
3772#define SVAL_TAGMASK (3ULL << 62)
3773
3774static inline Bool SVal__isC ( SVal s ) {
3775 return (0ULL << 62) == (s & SVAL_TAGMASK);
3776}
3777static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
3778 //tl_assert(VtsID__is_valid(rmini));
3779 //tl_assert(VtsID__is_valid(wmini));
3780 return (((ULong)rmini) << 32) | ((ULong)wmini);
3781}
3782static inline VtsID SVal__unC_Rmin ( SVal s ) {
3783 tl_assert(SVal__isC(s));
3784 return (VtsID)(s >> 32);
3785}
3786static inline VtsID SVal__unC_Wmin ( SVal s ) {
3787 tl_assert(SVal__isC(s));
3788 return (VtsID)(s & 0xFFFFFFFFULL);
3789}
3790
sewardj23f12002009-07-24 08:45:08 +00003791static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00003792 return (2ULL << 62) == (s & SVAL_TAGMASK);
3793}
sewardj23f12002009-07-24 08:45:08 +00003794static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00003795 return 2ULL << 62;
3796}
3797
3798/* Direct callback from lib_zsm. */
3799static void SVal__rcinc ( SVal s ) {
3800 if (SVal__isC(s)) {
3801 VtsID__rcinc( SVal__unC_Rmin(s) );
3802 VtsID__rcinc( SVal__unC_Wmin(s) );
3803 }
3804}
3805
3806/* Direct callback from lib_zsm. */
3807static void SVal__rcdec ( SVal s ) {
3808 if (SVal__isC(s)) {
3809 VtsID__rcdec( SVal__unC_Rmin(s) );
3810 VtsID__rcdec( SVal__unC_Wmin(s) );
3811 }
3812}
3813
3814
3815/////////////////////////////////////////////////////////
3816// //
sewardjd86e3a22008-12-03 11:39:37 +00003817// A simple group (memory) allocator //
3818// //
3819/////////////////////////////////////////////////////////
3820
3821//////////////// BEGIN general group allocator
3822typedef
3823 struct {
3824 UWord elemSzB; /* element size */
3825 UWord nPerGroup; /* # elems per group */
3826 void* (*alloc)(HChar*, SizeT); /* group allocator */
3827 HChar* cc; /* group allocator's cc */
3828 void (*free)(void*); /* group allocator's free-er (unused) */
3829 /* XArray of void* (pointers to groups). The groups themselves.
3830 Each element is a pointer to a block of size (elemSzB *
3831 nPerGroup) bytes. */
3832 XArray* groups;
3833 /* next free element. Is a pointer to an element in one of the
3834 groups pointed to by .groups. */
3835 void* nextFree;
3836 }
3837 GroupAlloc;
3838
3839static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
3840 UWord elemSzB,
3841 UWord nPerGroup,
3842 void* (*alloc)(HChar*, SizeT),
3843 HChar* cc,
3844 void (*free)(void*) )
3845{
3846 tl_assert(0 == (elemSzB % sizeof(UWord)));
3847 tl_assert(elemSzB >= sizeof(UWord));
3848 tl_assert(nPerGroup >= 100); /* let's say */
3849 tl_assert(alloc);
3850 tl_assert(cc);
3851 tl_assert(free);
3852 tl_assert(ga);
3853 VG_(memset)(ga, 0, sizeof(*ga));
3854 ga->elemSzB = elemSzB;
3855 ga->nPerGroup = nPerGroup;
3856 ga->groups = NULL;
3857 ga->alloc = alloc;
3858 ga->cc = cc;
3859 ga->free = free;
3860 ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
3861 ga->nextFree = NULL;
3862 tl_assert(ga->groups);
3863}
3864
3865/* The freelist is empty. Allocate a new group and put all the new
3866 elements in it onto the freelist. */
3867__attribute__((noinline))
3868static void gal_add_new_group ( GroupAlloc* ga )
3869{
3870 Word i;
3871 UWord* group;
3872 tl_assert(ga);
3873 tl_assert(ga->nextFree == NULL);
3874 group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
3875 tl_assert(group);
3876 /* extend the freelist through the new group. Place the freelist
3877 pointer in the first word of each element. That's why the
3878 element size must be at least one word. */
3879 for (i = ga->nPerGroup-1; i >= 0; i--) {
3880 UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
3881 UWord* elem = (UWord*)elemC;
3882 tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
3883 *elem = (UWord)ga->nextFree;
3884 ga->nextFree = elem;
3885 }
3886 /* and add to our collection of groups */
3887 VG_(addToXA)( ga->groups, &group );
3888}
3889
3890inline static void* gal_Alloc ( GroupAlloc* ga )
3891{
3892 UWord* elem;
3893 if (UNLIKELY(ga->nextFree == NULL)) {
3894 gal_add_new_group(ga);
3895 }
3896 elem = ga->nextFree;
3897 ga->nextFree = (void*)*elem;
3898 *elem = 0; /* unnecessary, but just to be on the safe side */
3899 return elem;
3900}
3901
3902inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
3903{
3904 tl_assert(n == ga->elemSzB);
3905 return gal_Alloc( ga );
3906}
3907
3908inline static void gal_Free ( GroupAlloc* ga, void* p )
3909{
3910 UWord* elem = (UWord*)p;
3911 *elem = (UWord)ga->nextFree;
3912 ga->nextFree = elem;
3913}
3914//////////////// END general group allocator
3915
3916
3917/////////////////////////////////////////////////////////
3918// //
sewardjf98e1c02008-10-25 16:22:41 +00003919// Change-event map2 //
3920// //
3921/////////////////////////////////////////////////////////
3922
sewardjf98e1c02008-10-25 16:22:41 +00003923#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
3924
3925/* This is in two parts:
3926
sewardj23f12002009-07-24 08:45:08 +00003927 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00003928 traces. When the reference count of a stack trace becomes zero,
3929 it is removed from the set and freed up. The intent is to have
3930 a set of stack traces which can be referred to from (2), but to
3931 only represent each one once. The set is indexed/searched by
3932 ordering on the stack trace vectors.
3933
sewardj849b0ed2008-12-21 10:43:10 +00003934 2. A SparseWA of OldRefs. These store information about each old
3935 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00003936 location for which the information is recorded. For LRU
3937 purposes, each OldRef also contains a generation number,
3938 indicating when it was most recently accessed.
3939
3940 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00003941 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3942 size) triples to RCECs. This allows us to collect the last
3943 access-traceback by up to N_OLDREF_ACCS different triples for
3944 this location. The accs[] array is a MTF-array. If a binding
3945 falls off the end, that's too bad -- we will lose info about
3946 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00003947
sewardj849b0ed2008-12-21 10:43:10 +00003948 When the SparseWA becomes too big, we can throw away the OldRefs
sewardjf98e1c02008-10-25 16:22:41 +00003949 whose generation numbers are below some threshold; hence doing
3950 approximate LRU discarding. For each discarded OldRef we must
3951 of course decrement the reference count on the all RCECs it
3952 refers to, in order that entries from (1) eventually get
3953 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00003954
3955 A major improvement in reliability of this mechanism would be to
3956 have a dynamically sized OldRef.accs[] array, so no entries ever
3957 fall off the end. In investigations (Dec 08) it appears that a
3958 major cause for the non-availability of conflicting-access traces
3959 in race reports is caused by the fixed size of this array. I
3960 suspect for most OldRefs, only a few entries are used, but for a
3961 minority of cases there is an overflow, leading to info lossage.
3962 Investigations also suggest this is very workload and scheduling
3963 sensitive. Therefore a dynamic sizing would be better.
3964
3965 However, dynamic sizing would defeat the use of a GroupAllocator
3966 for OldRef structures. And that's important for performance. So
3967 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00003968*/
3969
3970
3971static UWord stats__ctxt_rcdec1 = 0;
3972static UWord stats__ctxt_rcdec2 = 0;
3973static UWord stats__ctxt_rcdec3 = 0;
3974static UWord stats__ctxt_rcdec_calls = 0;
3975static UWord stats__ctxt_rcdec_discards = 0;
3976static UWord stats__ctxt_rcdec1_eq = 0;
3977
3978static UWord stats__ctxt_tab_curr = 0;
3979static UWord stats__ctxt_tab_max = 0;
3980
3981static UWord stats__ctxt_tab_qs = 0;
3982static UWord stats__ctxt_tab_cmps = 0;
3983
3984
3985///////////////////////////////////////////////////////
sewardj111544a2010-04-12 20:05:24 +00003986//// Part (1): A hash table of RCECs
sewardjf98e1c02008-10-25 16:22:41 +00003987///
3988
3989#define N_FRAMES 8
3990
3991// (UInt) `echo "Reference Counted Execution Context" | md5sum`
3992#define RCEC_MAGIC 0xab88abb2UL
3993
3994//#define N_RCEC_TAB 98317 /* prime */
3995#define N_RCEC_TAB 196613 /* prime */
3996
3997typedef
3998 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00003999 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00004000 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00004001 UWord rc;
4002 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00004003 UWord frames_hash; /* hash of all the frames */
4004 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00004005 }
4006 RCEC;
4007
4008static RCEC** contextTab = NULL; /* hash table of RCEC*s */
4009
4010
4011/* Gives an arbitrary total order on RCEC .frames fields */
4012static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
4013 Word i;
4014 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
4015 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00004016 if (ec1->frames_hash < ec2->frames_hash) return -1;
4017 if (ec1->frames_hash > ec2->frames_hash) return 1;
4018 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004019 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00004020 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00004021 }
4022 return 0;
4023}
4024
4025
4026/* Dec the ref of this RCEC. */
4027static void ctxt__rcdec ( RCEC* ec )
4028{
4029 stats__ctxt_rcdec_calls++;
4030 tl_assert(ec && ec->magic == RCEC_MAGIC);
4031 tl_assert(ec->rc > 0);
4032 ec->rc--;
4033}
4034
4035static void ctxt__rcinc ( RCEC* ec )
4036{
4037 tl_assert(ec && ec->magic == RCEC_MAGIC);
4038 ec->rc++;
4039}
4040
4041
sewardjd86e3a22008-12-03 11:39:37 +00004042//////////// BEGIN RCEC group allocator
4043static GroupAlloc rcec_group_allocator;
4044
4045static RCEC* alloc_RCEC ( void ) {
4046 return gal_Alloc ( &rcec_group_allocator );
4047}
4048
4049static void free_RCEC ( RCEC* rcec ) {
4050 tl_assert(rcec->magic == RCEC_MAGIC);
4051 gal_Free( &rcec_group_allocator, rcec );
4052}
sewardj111544a2010-04-12 20:05:24 +00004053//////////// END RCEC group allocator
sewardjd86e3a22008-12-03 11:39:37 +00004054
4055
sewardjf98e1c02008-10-25 16:22:41 +00004056/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
4057 move it one step closer the the front of the list, so as to make
4058 subsequent searches for it cheaper. */
4059static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
4060{
4061 RCEC *ec0, *ec1, *ec2;
4062 if (ec == *headp)
4063 tl_assert(0); /* already at head of list */
4064 tl_assert(ec != NULL);
4065 ec0 = *headp;
4066 ec1 = NULL;
4067 ec2 = NULL;
4068 while (True) {
4069 if (ec0 == NULL || ec0 == ec) break;
4070 ec2 = ec1;
4071 ec1 = ec0;
4072 ec0 = ec0->next;
4073 }
4074 tl_assert(ec0 == ec);
4075 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
4076 RCEC* tmp;
4077 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
4078 predecessor. Swap ec0 and ec1, that is, move ec0 one step
4079 closer to the start of the list. */
4080 tl_assert(ec2->next == ec1);
4081 tl_assert(ec1->next == ec0);
4082 tmp = ec0->next;
4083 ec2->next = ec0;
4084 ec0->next = ec1;
4085 ec1->next = tmp;
4086 }
4087 else
4088 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
4089 /* it's second in the list. */
4090 tl_assert(*headp == ec1);
4091 tl_assert(ec1->next == ec0);
4092 ec1->next = ec0->next;
4093 ec0->next = ec1;
4094 *headp = ec0;
4095 }
4096}
4097
4098
4099/* Find the given RCEC in the tree, and return a pointer to it. Or,
4100 if not present, add the given one to the tree (by making a copy of
4101 it, so the caller can immediately deallocate the original) and
4102 return a pointer to the copy. The caller can safely have 'example'
4103 on its stack, since we will always return a pointer to a copy of
4104 it, not to the original. Note that the inserted node will have .rc
4105 of zero and so the caller must immediatly increment it. */
4106__attribute__((noinline))
4107static RCEC* ctxt__find_or_add ( RCEC* example )
4108{
4109 UWord hent;
4110 RCEC* copy;
4111 tl_assert(example && example->magic == RCEC_MAGIC);
4112 tl_assert(example->rc == 0);
4113
4114 /* Search the hash table to see if we already have it. */
4115 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00004116 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00004117 copy = contextTab[hent];
4118 while (1) {
4119 if (!copy) break;
4120 tl_assert(copy->magic == RCEC_MAGIC);
4121 stats__ctxt_tab_cmps++;
4122 if (0 == RCEC__cmp_by_frames(copy, example)) break;
4123 copy = copy->next;
4124 }
4125
4126 if (copy) {
4127 tl_assert(copy != example);
4128 /* optimisation: if it's not at the head of its list, move 1
4129 step fwds, to make future searches cheaper */
4130 if (copy != contextTab[hent]) {
4131 move_RCEC_one_step_forward( &contextTab[hent], copy );
4132 }
4133 } else {
sewardjd86e3a22008-12-03 11:39:37 +00004134 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00004135 tl_assert(copy != example);
4136 *copy = *example;
4137 copy->next = contextTab[hent];
4138 contextTab[hent] = copy;
4139 stats__ctxt_tab_curr++;
4140 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4141 stats__ctxt_tab_max = stats__ctxt_tab_curr;
4142 }
4143 return copy;
4144}
4145
4146static inline UWord ROLW ( UWord w, Int n )
4147{
4148 Int bpw = 8 * sizeof(UWord);
4149 w = (w << n) | (w >> (bpw-n));
4150 return w;
4151}
4152
4153__attribute__((noinline))
4154static RCEC* get_RCEC ( Thr* thr )
4155{
4156 UWord hash, i;
4157 RCEC example;
4158 example.magic = RCEC_MAGIC;
4159 example.rc = 0;
4160 example.rcX = 0;
njn6c83d5e2009-05-05 23:46:24 +00004161 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00004162 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00004163 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00004164 hash ^= example.frames[i];
4165 hash = ROLW(hash, 19);
4166 }
njn6c83d5e2009-05-05 23:46:24 +00004167 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00004168 return ctxt__find_or_add( &example );
4169}
4170
4171///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00004172//// Part (2):
4173/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00004174///
4175
4176// (UInt) `echo "Old Reference Information" | md5sum`
4177#define OldRef_MAGIC 0x30b1f075UL
4178
sewardjffce8152011-06-24 10:09:41 +00004179/* Records an access: a thread, a context (size & writeness) and the
4180 number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
4181 2, 10 = 4, 11 = 8.
sewardjc5ea9962008-12-07 01:41:46 +00004182*/
sewardjffce8152011-06-24 10:09:41 +00004183typedef
4184 struct {
4185 RCEC* rcec;
4186 WordSetID locksHeldW;
4187 UInt thrid : SCALARTS_N_THRBITS;
4188 UInt szLg2B : 2;
4189 UInt isW : 1;
4190 }
4191 Thr_n_RCEC;
sewardjf98e1c02008-10-25 16:22:41 +00004192
sewardj849b0ed2008-12-21 10:43:10 +00004193#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00004194
4195typedef
4196 struct {
sewardjd86e3a22008-12-03 11:39:37 +00004197 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00004198 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00004199 /* or free list when not in use */
sewardjffce8152011-06-24 10:09:41 +00004200 /* unused slots in this array have .thrid == 0, which is invalid */
sewardjf98e1c02008-10-25 16:22:41 +00004201 Thr_n_RCEC accs[N_OLDREF_ACCS];
4202 }
4203 OldRef;
4204
sewardjd86e3a22008-12-03 11:39:37 +00004205
4206//////////// BEGIN OldRef group allocator
4207static GroupAlloc oldref_group_allocator;
4208
4209static OldRef* alloc_OldRef ( void ) {
4210 return gal_Alloc ( &oldref_group_allocator );
4211}
4212
4213static void free_OldRef ( OldRef* r ) {
4214 tl_assert(r->magic == OldRef_MAGIC);
4215 gal_Free( &oldref_group_allocator, r );
4216}
4217//////////// END OldRef group allocator
4218
sewardjd86e3a22008-12-03 11:39:37 +00004219
sewardjbc307e52008-12-06 22:10:54 +00004220static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
4221static UWord oldrefGen = 0; /* current LRU generation # */
4222static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
4223static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00004224
sewardj1669cc72008-12-13 01:20:21 +00004225inline static UInt min_UInt ( UInt a, UInt b ) {
4226 return a < b ? a : b;
4227}
4228
sewardja781be62008-12-08 00:12:28 +00004229/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
4230 first interval is lower, 1 if the first interval is higher, and 0
4231 if there is any overlap. Redundant paranoia with casting is there
4232 following what looked distinctly like a bug in gcc-4.1.2, in which
4233 some of the comparisons were done signedly instead of
4234 unsignedly. */
4235/* Copied from exp-ptrcheck/sg_main.c */
4236static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4237 Addr a2, SizeT n2 ) {
4238 UWord a1w = (UWord)a1;
4239 UWord n1w = (UWord)n1;
4240 UWord a2w = (UWord)a2;
4241 UWord n2w = (UWord)n2;
4242 tl_assert(n1w > 0 && n2w > 0);
4243 if (a1w + n1w <= a2w) return -1L;
4244 if (a2w + n2w <= a1w) return 1L;
4245 return 0;
4246}
4247
sewardjc5ea9962008-12-07 01:41:46 +00004248static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00004249{
sewardjd86e3a22008-12-03 11:39:37 +00004250 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00004251 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00004252 Word i, j;
4253 UWord keyW, valW;
4254 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004255
sewardjffce8152011-06-24 10:09:41 +00004256 tl_assert(thr);
4257 ThrID thrid = thr->thrid;
4258 tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4259
4260 WordSetID locksHeldW = thr->hgthread->locksetW;
4261
sewardjc5ea9962008-12-07 01:41:46 +00004262 rcec = get_RCEC( thr );
4263 ctxt__rcinc(rcec);
4264
sewardjffce8152011-06-24 10:09:41 +00004265 UInt szLg2B = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004266 switch (szB) {
4267 /* This doesn't look particularly branch-predictor friendly. */
sewardjffce8152011-06-24 10:09:41 +00004268 case 1: szLg2B = 0; break;
4269 case 2: szLg2B = 1; break;
4270 case 4: szLg2B = 2; break;
4271 case 8: szLg2B = 3; break;
sewardjc5ea9962008-12-07 01:41:46 +00004272 default: tl_assert(0);
4273 }
4274
sewardjffce8152011-06-24 10:09:41 +00004275 /* Look in the map to see if we already have a record for this
4276 address. */
sewardjbc307e52008-12-06 22:10:54 +00004277 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00004278
sewardjd86e3a22008-12-03 11:39:37 +00004279 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00004280
4281 /* We already have a record for this address. We now need to
sewardjffce8152011-06-24 10:09:41 +00004282 see if we have a stack trace pertaining to this (thrid, R/W,
sewardj849b0ed2008-12-21 10:43:10 +00004283 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00004284 tl_assert(keyW == a);
4285 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004286 tl_assert(ref->magic == OldRef_MAGIC);
4287
sewardjf98e1c02008-10-25 16:22:41 +00004288 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004289 if (ref->accs[i].thrid != thrid)
sewardj849b0ed2008-12-21 10:43:10 +00004290 continue;
sewardjffce8152011-06-24 10:09:41 +00004291 if (ref->accs[i].szLg2B != szLg2B)
4292 continue;
4293 if (ref->accs[i].isW != (UInt)(isW & 1))
sewardj849b0ed2008-12-21 10:43:10 +00004294 continue;
4295 /* else we have a match, so stop looking. */
4296 break;
sewardjf98e1c02008-10-25 16:22:41 +00004297 }
4298
4299 if (i < N_OLDREF_ACCS) {
sewardjffce8152011-06-24 10:09:41 +00004300 /* thread 'thr' has an entry at index 'i'. Update its RCEC. */
sewardjf98e1c02008-10-25 16:22:41 +00004301 if (i > 0) {
4302 Thr_n_RCEC tmp = ref->accs[i-1];
4303 ref->accs[i-1] = ref->accs[i];
4304 ref->accs[i] = tmp;
4305 i--;
4306 }
sewardjc5ea9962008-12-07 01:41:46 +00004307 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00004308 stats__ctxt_rcdec1++;
sewardjffce8152011-06-24 10:09:41 +00004309 ctxt__rcdec( ref->accs[i].rcec );
4310 tl_assert(ref->accs[i].thrid == thrid);
4311 /* Update the RCEC and the W-held lockset. */
4312 ref->accs[i].rcec = rcec;
4313 ref->accs[i].locksHeldW = locksHeldW;
sewardjf98e1c02008-10-25 16:22:41 +00004314 } else {
sewardjffce8152011-06-24 10:09:41 +00004315 /* No entry for this (thread, R/W, size, nWHeld) quad.
4316 Shuffle all of them down one slot, and put the new entry
4317 at the start of the array. */
4318 if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
sewardjf98e1c02008-10-25 16:22:41 +00004319 /* the last slot is in use. We must dec the rc on the
4320 associated rcec. */
4321 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
4322 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00004323 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
4324 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjffce8152011-06-24 10:09:41 +00004325 ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
sewardjf98e1c02008-10-25 16:22:41 +00004326 } else {
4327 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
4328 }
4329 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
4330 ref->accs[j] = ref->accs[j-1];
sewardjffce8152011-06-24 10:09:41 +00004331 ref->accs[0].thrid = thrid;
4332 ref->accs[0].szLg2B = szLg2B;
4333 ref->accs[0].isW = (UInt)(isW & 1);
4334 ref->accs[0].locksHeldW = locksHeldW;
4335 ref->accs[0].rcec = rcec;
4336 /* thrid==0 is used to signify an empty slot, so we can't
4337 add zero thrid (such a ThrID is invalid anyway). */
4338 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
sewardjf98e1c02008-10-25 16:22:41 +00004339 }
4340
4341 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00004342
4343 } else {
4344
4345 /* We don't have a record for this address. Create a new one. */
4346 if (oldrefTreeN >= oldrefGenIncAt) {
4347 oldrefGen++;
4348 oldrefGenIncAt = oldrefTreeN + 50000;
4349 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
4350 oldrefGen, oldrefTreeN );
4351 }
sewardjd86e3a22008-12-03 11:39:37 +00004352
4353 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00004354 ref->magic = OldRef_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00004355 ref->gen = oldrefGen;
4356 ref->accs[0].thrid = thrid;
4357 ref->accs[0].szLg2B = szLg2B;
4358 ref->accs[0].isW = (UInt)(isW & 1);
4359 ref->accs[0].locksHeldW = locksHeldW;
4360 ref->accs[0].rcec = rcec;
4361
4362 /* thrid==0 is used to signify an empty slot, so we can't
4363 add zero thrid (such a ThrID is invalid anyway). */
4364 /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4365
4366 /* Clear out the rest of the entries */
sewardjf98e1c02008-10-25 16:22:41 +00004367 for (j = 1; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004368 ref->accs[j].rcec = NULL;
4369 ref->accs[j].thrid = 0;
4370 ref->accs[j].szLg2B = 0;
4371 ref->accs[j].isW = 0;
4372 ref->accs[j].locksHeldW = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004373 }
sewardjbc307e52008-12-06 22:10:54 +00004374 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00004375 oldrefTreeN++;
4376
4377 }
4378}
4379
4380
sewardjffce8152011-06-24 10:09:41 +00004381/* Extract info from the conflicting-access machinery. */
sewardjc5ea9962008-12-07 01:41:46 +00004382Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjffce8152011-06-24 10:09:41 +00004383 /*OUT*/Thr** resThr,
4384 /*OUT*/SizeT* resSzB,
4385 /*OUT*/Bool* resIsW,
4386 /*OUT*/WordSetID* locksHeldW,
sewardjc5ea9962008-12-07 01:41:46 +00004387 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00004388{
sewardja781be62008-12-08 00:12:28 +00004389 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00004390 OldRef* ref;
4391 UWord keyW, valW;
4392 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00004393
sewardjffce8152011-06-24 10:09:41 +00004394 ThrID cand_thrid;
4395 RCEC* cand_rcec;
4396 Bool cand_isW;
4397 SizeT cand_szB;
4398 WordSetID cand_locksHeldW;
4399 Addr cand_a;
sewardja781be62008-12-08 00:12:28 +00004400
4401 Addr toCheck[15];
4402 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00004403
4404 tl_assert(thr);
4405 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00004406
sewardjffce8152011-06-24 10:09:41 +00004407 ThrID thrid = thr->thrid;
4408
sewardja781be62008-12-08 00:12:28 +00004409 toCheck[nToCheck++] = a;
4410 for (i = -7; i < (Word)szB; i++) {
4411 if (i != 0)
4412 toCheck[nToCheck++] = a + i;
4413 }
4414 tl_assert(nToCheck <= 15);
4415
4416 /* Now see if we can find a suitable matching event for
4417 any of the addresses in toCheck[0 .. nToCheck-1]. */
4418 for (j = 0; j < nToCheck; j++) {
4419
4420 cand_a = toCheck[j];
4421 // VG_(printf)("test %ld %p\n", j, cand_a);
4422
4423 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
4424 if (!b)
4425 continue;
4426
sewardjd86e3a22008-12-03 11:39:37 +00004427 ref = (OldRef*)valW;
sewardja781be62008-12-08 00:12:28 +00004428 tl_assert(keyW == cand_a);
sewardjf98e1c02008-10-25 16:22:41 +00004429 tl_assert(ref->magic == OldRef_MAGIC);
sewardjffce8152011-06-24 10:09:41 +00004430 tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
sewardjf98e1c02008-10-25 16:22:41 +00004431
sewardjffce8152011-06-24 10:09:41 +00004432 cand_thrid = 0; /* invalid; see comments in event_map_bind */
4433 cand_rcec = NULL;
4434 cand_isW = False;
4435 cand_szB = 0;
4436 cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
sewardjf98e1c02008-10-25 16:22:41 +00004437
sewardjc5ea9962008-12-07 01:41:46 +00004438 for (i = 0; i < N_OLDREF_ACCS; i++) {
4439 Thr_n_RCEC* cand = &ref->accs[i];
sewardjffce8152011-06-24 10:09:41 +00004440 cand_rcec = cand->rcec;
4441 cand_thrid = cand->thrid;
4442 cand_isW = (Bool)cand->isW;
4443 cand_szB = 1 << cand->szLg2B;
4444 cand_locksHeldW = cand->locksHeldW;
sewardjc5ea9962008-12-07 01:41:46 +00004445
sewardjffce8152011-06-24 10:09:41 +00004446 if (cand_thrid == 0)
sewardjc5ea9962008-12-07 01:41:46 +00004447 /* This slot isn't in use. Ignore it. */
4448 continue;
4449
sewardjffce8152011-06-24 10:09:41 +00004450 if (cand_thrid == thrid)
sewardjc5ea9962008-12-07 01:41:46 +00004451 /* This is an access by the same thread, but we're only
4452 interested in accesses from other threads. Ignore. */
4453 continue;
4454
4455 if ((!cand_isW) && (!isW))
4456 /* We don't want to report a read racing against another
4457 read; that's stupid. So in this case move on. */
4458 continue;
4459
sewardja781be62008-12-08 00:12:28 +00004460 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
4461 /* No overlap with the access we're asking about. Ignore. */
4462 continue;
4463
sewardjc5ea9962008-12-07 01:41:46 +00004464 /* We have a match. Stop searching. */
4465 break;
4466 }
4467
4468 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
4469
sewardja781be62008-12-08 00:12:28 +00004470 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00004471 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00004472 /* return with success */
sewardjffce8152011-06-24 10:09:41 +00004473 tl_assert(cand_thrid);
sewardja781be62008-12-08 00:12:28 +00004474 tl_assert(cand_rcec);
4475 tl_assert(cand_rcec->magic == RCEC_MAGIC);
4476 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00004477 /* Count how many non-zero frames we have. */
4478 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4479 for (n = 0; n < maxNFrames; n++) {
4480 if (0 == cand_rcec->frames[n]) break;
4481 }
sewardjffce8152011-06-24 10:09:41 +00004482 *resEC = VG_(make_ExeContext_from_StackTrace)
4483 (cand_rcec->frames, n);
4484 *resThr = Thr__from_ThrID(cand_thrid);
4485 *resSzB = cand_szB;
4486 *resIsW = cand_isW;
4487 *locksHeldW = cand_locksHeldW;
sewardja781be62008-12-08 00:12:28 +00004488 return True;
4489 }
sewardjc5ea9962008-12-07 01:41:46 +00004490
sewardja781be62008-12-08 00:12:28 +00004491 /* consider next address in toCheck[] */
4492 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00004493
sewardja781be62008-12-08 00:12:28 +00004494 /* really didn't find anything. */
4495 return False;
sewardjf98e1c02008-10-25 16:22:41 +00004496}
4497
4498static void event_map_init ( void )
4499{
4500 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00004501
4502 /* Context (RCEC) group allocator */
4503 init_GroupAlloc ( &rcec_group_allocator,
4504 sizeof(RCEC),
4505 1000 /* RCECs per group */,
4506 HG_(zalloc),
4507 "libhb.event_map_init.1 (RCEC groups)",
4508 HG_(free) );
4509
4510 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00004511 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00004512 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00004513 N_RCEC_TAB * sizeof(RCEC*) );
4514 tl_assert(contextTab);
4515 for (i = 0; i < N_RCEC_TAB; i++)
4516 contextTab[i] = NULL;
4517
sewardjd86e3a22008-12-03 11:39:37 +00004518 /* Oldref group allocator */
4519 init_GroupAlloc ( &oldref_group_allocator,
4520 sizeof(OldRef),
4521 1000 /* OldRefs per group */,
4522 HG_(zalloc),
4523 "libhb.event_map_init.3 (OldRef groups)",
4524 HG_(free) );
4525
sewardjd86e3a22008-12-03 11:39:37 +00004526 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00004527 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00004528 oldrefTree = VG_(newSWA)(
4529 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00004530 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00004531 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00004532 );
4533 tl_assert(oldrefTree);
4534
4535 oldrefGen = 0;
4536 oldrefGenIncAt = 0;
4537 oldrefTreeN = 0;
4538}
4539
4540static void event_map__check_reference_counts ( Bool before )
4541{
4542 RCEC* rcec;
4543 OldRef* oldref;
4544 Word i;
4545 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00004546 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00004547
4548 /* Set the 'check' reference counts to zero. Also, optionally
4549 check that the real reference counts are non-zero. We allow
4550 these to fall to zero before a GC, but the GC must get rid of
4551 all those that are zero, hence none should be zero after a
4552 GC. */
4553 for (i = 0; i < N_RCEC_TAB; i++) {
4554 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4555 nEnts++;
4556 tl_assert(rcec);
4557 tl_assert(rcec->magic == RCEC_MAGIC);
4558 if (!before)
4559 tl_assert(rcec->rc > 0);
4560 rcec->rcX = 0;
4561 }
4562 }
4563
4564 /* check that the stats are sane */
4565 tl_assert(nEnts == stats__ctxt_tab_curr);
4566 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4567
4568 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00004569 VG_(initIterSWA)( oldrefTree );
4570 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004571 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004572 tl_assert(oldref->magic == OldRef_MAGIC);
4573 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjffce8152011-06-24 10:09:41 +00004574 ThrID aThrID = oldref->accs[i].thrid;
4575 RCEC* aRef = oldref->accs[i].rcec;
4576 if (aThrID != 0) {
sewardjc5ea9962008-12-07 01:41:46 +00004577 tl_assert(aRef);
4578 tl_assert(aRef->magic == RCEC_MAGIC);
4579 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00004580 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004581 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00004582 }
4583 }
4584 }
4585
4586 /* compare check ref counts with actual */
4587 for (i = 0; i < N_RCEC_TAB; i++) {
4588 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4589 tl_assert(rcec->rc == rcec->rcX);
4590 }
4591 }
4592}
4593
sewardj8fd92d32008-11-20 23:17:01 +00004594__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00004595static void event_map_maybe_GC ( void )
4596{
4597 OldRef* oldref;
4598 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00004599 XArray* refs2del;
4600 Word i, j, n2del;
4601
sewardj8fd92d32008-11-20 23:17:01 +00004602 UWord* genMap = NULL;
4603 UWord genMap_min = 0;
4604 UWord genMap_size = 0;
4605
sewardj849b0ed2008-12-21 10:43:10 +00004606 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
sewardjf98e1c02008-10-25 16:22:41 +00004607 return;
4608
4609 if (0)
4610 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
4611
sewardj849b0ed2008-12-21 10:43:10 +00004612 /* Check for sane command line params. Limit values must match
4613 those in hg_process_cmd_line_option. */
4614 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
sewardjf585e482009-08-16 22:52:29 +00004615 tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 );
sewardj849b0ed2008-12-21 10:43:10 +00004616
sewardj8f5374e2008-12-07 11:40:17 +00004617 /* Check our counting is sane (expensive) */
4618 if (CHECK_CEM)
4619 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00004620
sewardj8f5374e2008-12-07 11:40:17 +00004621 /* Check the reference counts (expensive) */
4622 if (CHECK_CEM)
4623 event_map__check_reference_counts( True/*before*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004624
sewardj8fd92d32008-11-20 23:17:01 +00004625 /* Compute the distribution of generation values in the ref tree.
4626 There are likely only to be a few different generation numbers
4627 in the whole tree, but we don't know what they are. Hence use a
4628 dynamically resized array of counters. The array is genMap[0
4629 .. genMap_size-1], where genMap[0] is the count for the
4630 generation number genMap_min, genMap[1] is the count for
4631 genMap_min+1, etc. If a new number is seen outside the range
4632 [genMap_min .. genMap_min + genMap_size - 1] then the array is
4633 copied into a larger array, and genMap_min and genMap_size are
4634 adjusted accordingly. */
4635
sewardjf98e1c02008-10-25 16:22:41 +00004636 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00004637
sewardjbc307e52008-12-06 22:10:54 +00004638 VG_(initIterSWA)( oldrefTree );
4639 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00004640
sewardjd86e3a22008-12-03 11:39:37 +00004641 UWord ea, key;
4642 oldref = (OldRef*)valW;
4643 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00004644
4645 /* BEGIN find 'ea', which is the index in genMap holding the
4646 count for generation number 'key'. */
4647 if (UNLIKELY(genMap == NULL)) {
4648 /* deal with the first key to be seen, so that the following
4649 cases don't need to handle the complexity of a NULL count
4650 array. */
4651 genMap_min = key;
4652 genMap_size = 1;
4653 genMap = HG_(zalloc)( "libhb.emmG.1a",
4654 genMap_size * sizeof(UWord) );
4655 ea = 0;
4656 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
4657 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004658 }
sewardj8fd92d32008-11-20 23:17:01 +00004659 else
4660 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
4661 /* this is the expected (almost-always-happens) case: 'key'
4662 is already mapped in the array. */
4663 ea = key - genMap_min;
4664 }
4665 else
4666 if (key < genMap_min) {
4667 /* 'key' appears before the start of the current array.
4668 Extend the current array by allocating a larger one and
4669 copying the current one to the upper end of it. */
4670 Word more;
4671 UWord* map2;
4672 more = genMap_min - key;
4673 tl_assert(more > 0);
4674 map2 = HG_(zalloc)( "libhb.emmG.1b",
4675 (genMap_size + more) * sizeof(UWord) );
4676 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
4677 HG_(free)( genMap );
4678 genMap = map2;
4679 genMap_size += more;
4680 genMap_min -= more;
4681 ea = 0;
4682 tl_assert(genMap_min == key);
4683 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
4684 key, genMap_min, genMap_min+genMap_size- 1 );
4685 }
4686 else {
4687 /* 'key' appears after the end of the current array. Extend
4688 the current array by allocating a larger one and copying
4689 the current one to the lower end of it. */
4690 Word more;
4691 UWord* map2;
4692 tl_assert(key >= genMap_min + genMap_size);
4693 more = key - (genMap_min + genMap_size) + 1;
4694 tl_assert(more > 0);
4695 map2 = HG_(zalloc)( "libhb.emmG.1c",
4696 (genMap_size + more) * sizeof(UWord) );
4697 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
4698 HG_(free)( genMap );
4699 genMap = map2;
4700 genMap_size += more;
4701 ea = genMap_size - 1;;
4702 tl_assert(genMap_min + genMap_size - 1 == key);
4703 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
4704 key, genMap_min, genMap_min+genMap_size- 1 );
4705 }
4706 /* END find 'ea' from 'key' */
4707
4708 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00004709 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00004710 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00004711 }
4712
sewardj8fd92d32008-11-20 23:17:01 +00004713 tl_assert(genMap);
4714 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00004715
sewardj8fd92d32008-11-20 23:17:01 +00004716 /* Sanity check what we just computed */
4717 { UWord sum = 0;
4718 for (i = 0; i < genMap_size; i++) {
4719 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
4720 i + genMap_min, genMap[i] );
4721 sum += genMap[i];
4722 }
4723 tl_assert(sum == oldrefTreeN);
4724 }
4725
4726 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00004727 retained = oldrefTreeN;
4728 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00004729
4730 for (i = 0; i < genMap_size; i++) {
4731 keyW = i + genMap_min;
4732 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00004733 tl_assert(keyW > 0); /* can't allow a generation # 0 */
4734 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
4735 tl_assert(keyW >= maxGen);
4736 tl_assert(retained >= valW);
4737 if (retained - valW
sewardj849b0ed2008-12-21 10:43:10 +00004738 > (UWord)(HG_(clo_conflict_cache_size)
4739 * EVENT_MAP_GC_DISCARD_FRACTION)) {
sewardjf98e1c02008-10-25 16:22:41 +00004740 retained -= valW;
4741 maxGen = keyW;
4742 } else {
4743 break;
4744 }
4745 }
sewardjf98e1c02008-10-25 16:22:41 +00004746
sewardj8fd92d32008-11-20 23:17:01 +00004747 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00004748
sewardj9b1f0fd2008-11-18 23:40:00 +00004749 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00004750
4751 /* Now make up a big list of the oldrefTree entries we want to
4752 delete. We can't simultaneously traverse the tree and delete
4753 stuff from it, so first we need to copy them off somewhere
4754 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00004755 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00004756 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00004757
sewardj9b1f0fd2008-11-18 23:40:00 +00004758 if (retained < oldrefTreeN) {
4759
4760 /* This is the normal (expected) case. We discard any ref whose
4761 generation number <= maxGen. */
sewardjbc307e52008-12-06 22:10:54 +00004762 VG_(initIterSWA)( oldrefTree );
4763 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00004764 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00004765 tl_assert(oldref->magic == OldRef_MAGIC);
4766 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00004767 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00004768 }
sewardjf98e1c02008-10-25 16:22:41 +00004769 }
sewardj5e2ac3b2009-08-11 10:39:25 +00004770 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004771 VG_(message)(Vg_DebugMsg,
4772 "libhb: EvM GC: delete generations %lu and below, "
sewardj24118492009-07-15 14:50:02 +00004773 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00004774 maxGen, retained );
4775 }
4776
4777 } else {
4778
4779 static UInt rand_seed = 0; /* leave as static */
4780
4781 /* Degenerate case: there's only one generation in the entire
4782 tree, so we need to have some other way of deciding which
4783 refs to throw away. Just throw out half of them randomly. */
4784 tl_assert(retained == oldrefTreeN);
sewardjbc307e52008-12-06 22:10:54 +00004785 VG_(initIterSWA)( oldrefTree );
4786 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004787 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00004788 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00004789 tl_assert(oldref->magic == OldRef_MAGIC);
4790 n = VG_(random)( &rand_seed );
4791 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00004792 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00004793 retained--;
4794 }
4795 }
sewardj5e2ac3b2009-08-11 10:39:25 +00004796 if (VG_(clo_stats)) {
sewardj9b1f0fd2008-11-18 23:40:00 +00004797 VG_(message)(Vg_DebugMsg,
4798 "libhb: EvM GC: randomly delete half the entries, "
sewardj24118492009-07-15 14:50:02 +00004799 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00004800 retained );
4801 }
4802
sewardjf98e1c02008-10-25 16:22:41 +00004803 }
4804
4805 n2del = VG_(sizeXA)( refs2del );
4806 tl_assert(n2del == (Word)(oldrefTreeN - retained));
4807
4808 if (0) VG_(printf)("%s","deleting entries\n");
4809 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00004810 Bool b;
4811 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
sewardjbc307e52008-12-06 22:10:54 +00004812 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
sewardjd86e3a22008-12-03 11:39:37 +00004813 tl_assert(b);
4814 tl_assert(keyW == ga2del);
4815 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00004816 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjffce8152011-06-24 10:09:41 +00004817 ThrID aThrID = oldref->accs[j].thrid;
4818 RCEC* aRef = oldref->accs[j].rcec;
sewardjc5ea9962008-12-07 01:41:46 +00004819 if (aRef) {
sewardjffce8152011-06-24 10:09:41 +00004820 tl_assert(aThrID != 0);
sewardjf98e1c02008-10-25 16:22:41 +00004821 stats__ctxt_rcdec3++;
sewardjc5ea9962008-12-07 01:41:46 +00004822 ctxt__rcdec( aRef );
sewardjf98e1c02008-10-25 16:22:41 +00004823 } else {
sewardjffce8152011-06-24 10:09:41 +00004824 tl_assert(aThrID == 0);
sewardjf98e1c02008-10-25 16:22:41 +00004825 }
4826 }
sewardjd86e3a22008-12-03 11:39:37 +00004827
4828 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00004829 }
4830
4831 VG_(deleteXA)( refs2del );
4832
sewardjc5ea9962008-12-07 01:41:46 +00004833 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00004834
4835 oldrefTreeN = retained;
4836 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
4837
4838 /* Throw away all RCECs with zero reference counts */
4839 for (i = 0; i < N_RCEC_TAB; i++) {
4840 RCEC** pp = &contextTab[i];
4841 RCEC* p = *pp;
4842 while (p) {
4843 if (p->rc == 0) {
4844 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004845 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004846 p = *pp;
4847 tl_assert(stats__ctxt_tab_curr > 0);
4848 stats__ctxt_tab_curr--;
4849 } else {
4850 pp = &p->next;
4851 p = p->next;
4852 }
4853 }
4854 }
4855
sewardj8f5374e2008-12-07 11:40:17 +00004856 /* Check the reference counts (expensive) */
4857 if (CHECK_CEM)
4858 event_map__check_reference_counts( False/*after*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004859
4860 //if (0)
4861 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
4862 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
4863
4864}
4865
4866
4867/////////////////////////////////////////////////////////
4868// //
4869// Core MSM //
4870// //
4871/////////////////////////////////////////////////////////
4872
sewardj23f12002009-07-24 08:45:08 +00004873/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4874 Nov 08, and again after [...],
4875 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004876
sewardj23f12002009-07-24 08:45:08 +00004877static ULong stats__msmcread = 0;
4878static ULong stats__msmcread_change = 0;
4879static ULong stats__msmcwrite = 0;
4880static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004881
sewardj8ab2c132009-08-02 09:34:35 +00004882/* Some notes on the H1 history mechanism:
4883
4884 Transition rules are:
4885
4886 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4887 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4888
4889 After any access by a thread T to a location L, L's constraint pair
4890 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4891
4892 After a race by thread T conflicting with some previous access by
4893 some other thread U, for a location with constraint (before
4894 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4895 which the previously access lies.
4896
4897 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4898 are compared so as to find out which thread(s) this access
4899 conflicts with. Once that is established, we also require the
4900 pre-update Cw for the location, so we can index into it for those
4901 threads, to get the scalar clock values for the point at which the
4902 former accesses were made. (In fact we only bother to do any of
4903 this for an arbitrarily chosen one of the conflicting threads, as
4904 that's simpler, it avoids flooding the user with vast amounts of
4905 mostly useless information, and because the program is wrong if it
4906 contains any races at all -- so we don't really need to show all
4907 conflicting access pairs initially, so long as we only show none if
4908 none exist).
4909
4910 ---
4911
4912 That requires the auxiliary proof that
4913
4914 (Cr `join` Kw)[T] == Kw[T]
4915
4916 Why should that be true? Because for any thread T, Kw[T] >= the
4917 scalar clock value for T known by any other thread. In other
4918 words, because T's value for its own scalar clock is at least as up
4919 to date as the value for it known by any other thread (that is true
4920 for both the R- and W- scalar clocks). Hence no other thread will
4921 be able to feed in a value for that element (indirectly via a
4922 constraint) which will exceed Kw[T], and hence the join cannot
4923 cause that particular element to advance.
4924*/
4925
sewardjf98e1c02008-10-25 16:22:41 +00004926__attribute__((noinline))
4927static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004928 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00004929 VtsID Cfailed,
4930 VtsID Kfailed,
4931 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00004932{
sewardjc5ea9962008-12-07 01:41:46 +00004933 /* Call here to report a race. We just hand it onwards to
4934 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004935 error is going to be collected, then, at history_level 2, that
4936 queries the conflicting-event map. The alternative would be to
4937 query it right here. But that causes a lot of pointless queries
4938 for errors which will shortly be discarded as duplicates, and
4939 can become a performance overhead; so we defer the query until
4940 we know the error is not a duplicate. */
4941
4942 /* Stacks for the bounds of the (or one of the) conflicting
4943 segment(s). These are only set at history_level 1. */
4944 ExeContext* hist1_seg_start = NULL;
4945 ExeContext* hist1_seg_end = NULL;
4946 Thread* hist1_conf_thr = NULL;
4947
4948 tl_assert(acc_thr);
sewardj60626642011-03-10 15:14:37 +00004949 tl_assert(acc_thr->hgthread);
4950 tl_assert(acc_thr->hgthread->hbthr == acc_thr);
sewardj23f12002009-07-24 08:45:08 +00004951 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4952
4953 if (HG_(clo_history_level) == 1) {
4954 Bool found;
4955 Word firstIx, lastIx;
4956 ULong_n_EC key;
4957
4958 /* At history_level 1, we must round up the relevant stack-pair
4959 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00004960 deferring it is complex; we can't (easily) put Kfailed and
4961 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00004962 getting tied up in difficulties with VtsID reference
4963 counting. So just do it now. */
4964 Thr* confThr;
4965 ULong confTym = 0;
4966 /* Which thread are we in conflict with? There may be more than
4967 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4968 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00004969 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00004970 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00004971 conflict (semantics of return value of
4972 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4973 called us, just checked exactly this -- that there was in
4974 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00004975 tl_assert(confThr);
4976
4977 /* Get the scalar clock value that the conflicting thread
4978 introduced into the constraint. A careful examination of the
4979 base machine rules shows that this must be the same as the
4980 conflicting thread's scalar clock when it created this
4981 constraint. Hence we know the scalar clock of the
4982 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00004983 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00004984
4985 /* Using this scalar clock, index into the conflicting thread's
4986 collection of stack traces made each time its vector clock
4987 (hence its scalar clock) changed. This gives the stack
4988 traces at the start and end of the conflicting segment (well,
4989 as per comment just above, of one of the conflicting
4990 segments, if there are more than one). */
4991 key.ull = confTym;
4992 key.ec = NULL;
4993 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00004994 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004995 firstIx = lastIx = 0;
4996 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00004997 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004998 &key, &firstIx, &lastIx,
4999 (Int(*)(void*,void*))cmp__ULong_n_EC__by_ULong
5000 );
sewardj8ab2c132009-08-02 09:34:35 +00005001 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00005002 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00005003 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00005004 confThr, confTym, found, firstIx, lastIx);
5005 /* We can't indefinitely collect stack traces at VTS
5006 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00005007 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00005008 ones, which in turn means we might fail to find index value
5009 confTym in the array. */
5010 if (found) {
5011 ULong_n_EC *pair_start, *pair_end;
5012 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00005013 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00005014 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00005015 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00005016 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00005017 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00005018 lastIx+1 );
5019 /* from properties of VG_(lookupXA) and the comparison fn used: */
5020 tl_assert(pair_start->ull < pair_end->ull);
5021 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00005022 /* Could do a bit better here. It may be that pair_end
5023 doesn't have a stack, but the following entries in the
5024 array have the same scalar Kw and to have a stack. So
5025 we should search a bit further along the array than
5026 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00005027 } else {
sewardjffce8152011-06-24 10:09:41 +00005028 if (!confThr->llexit_done)
sewardj23f12002009-07-24 08:45:08 +00005029 hist1_seg_end = main_get_EC( confThr );
5030 }
5031 // seg_start could be NULL iff this is the first stack in the thread
5032 //if (seg_start) VG_(pp_ExeContext)(seg_start);
5033 //if (seg_end) VG_(pp_ExeContext)(seg_end);
sewardj60626642011-03-10 15:14:37 +00005034 hist1_conf_thr = confThr->hgthread;
sewardj23f12002009-07-24 08:45:08 +00005035 }
5036 }
5037
sewardj60626642011-03-10 15:14:37 +00005038 HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00005039 szB, isWrite,
5040 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00005041}
5042
5043static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00005044 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00005045 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00005046 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
5047 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00005048}
5049
5050
5051/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00005052static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005053 /* The following are only needed for
5054 creating error reports. */
5055 Thr* acc_thr,
5056 Addr acc_addr, SizeT szB )
5057{
5058 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005059 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00005060
5061 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005062 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005063 tl_assert(is_sane_SVal_C(svOld));
5064 }
5065
sewardj1c0ce7a2009-07-01 08:10:49 +00005066 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005067 VtsID tviR = acc_thr->viR;
5068 VtsID tviW = acc_thr->viW;
5069 VtsID rmini = SVal__unC_Rmin(svOld);
5070 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005071 Bool leq = VtsID__cmpLEQ(rmini,tviR);
5072 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005073 /* no race */
5074 /* Note: RWLOCK subtlety: use tviW, not tviR */
5075 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5076 goto out;
5077 } else {
sewardjb0e009d2008-11-19 16:35:15 +00005078 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005079 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5080 tl_assert(leqxx);
5081 // same as in non-race case
5082 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
5083 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005084 rmini, /* Cfailed */
5085 tviR, /* Kfailed */
5086 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005087 goto out;
5088 }
5089 }
5090 if (SVal__isA(svOld)) {
5091 /* reading no-access memory (sigh); leave unchanged */
5092 /* check for no pollution */
5093 tl_assert(svOld == SVal_NOACCESS);
5094 svNew = SVal_NOACCESS;
5095 goto out;
5096 }
sewardj23f12002009-07-24 08:45:08 +00005097 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005098 tl_assert(0);
5099
5100 out:
sewardj8f5374e2008-12-07 11:40:17 +00005101 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005102 tl_assert(is_sane_SVal_C(svNew));
5103 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005104 if (UNLIKELY(svNew != svOld)) {
5105 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005106 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005107 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005108 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005109 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005110 }
5111 }
5112 return svNew;
5113}
5114
5115
5116/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00005117static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00005118 /* The following are only needed for
5119 creating error reports. */
5120 Thr* acc_thr,
5121 Addr acc_addr, SizeT szB )
5122{
5123 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00005124 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00005125
5126 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00005127 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005128 tl_assert(is_sane_SVal_C(svOld));
5129 }
5130
sewardj1c0ce7a2009-07-01 08:10:49 +00005131 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00005132 VtsID tviW = acc_thr->viW;
5133 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00005134 Bool leq = VtsID__cmpLEQ(wmini,tviW);
5135 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00005136 /* no race */
5137 svNew = SVal__mkC( tviW, tviW );
5138 goto out;
5139 } else {
5140 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00005141 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00005142 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5143 tl_assert(leqxx);
5144 // same as in non-race case
5145 // proof: in the non-race case, we have
5146 // rmini <= wmini (invar on constraints)
5147 // tviW <= tviR (invar on thread clocks)
5148 // wmini <= tviW (from run-time check)
5149 // hence from transitivity of <= we have
5150 // rmini <= wmini <= tviW
5151 // and so join(rmini,tviW) == tviW
5152 // and join(wmini,tviW) == tviW
5153 // qed.
5154 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5155 VtsID__join2(wmini, tviW) );
5156 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00005157 wmini, /* Cfailed */
5158 tviW, /* Kfailed */
5159 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00005160 goto out;
5161 }
5162 }
5163 if (SVal__isA(svOld)) {
5164 /* writing no-access memory (sigh); leave unchanged */
5165 /* check for no pollution */
5166 tl_assert(svOld == SVal_NOACCESS);
5167 svNew = SVal_NOACCESS;
5168 goto out;
5169 }
sewardj23f12002009-07-24 08:45:08 +00005170 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00005171 tl_assert(0);
5172
5173 out:
sewardj8f5374e2008-12-07 11:40:17 +00005174 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00005175 tl_assert(is_sane_SVal_C(svNew));
5176 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005177 if (UNLIKELY(svNew != svOld)) {
5178 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00005179 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00005180 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00005181 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00005182 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00005183 }
5184 }
5185 return svNew;
5186}
5187
5188
5189/////////////////////////////////////////////////////////
5190// //
5191// Apply core MSM to specific memory locations //
5192// //
5193/////////////////////////////////////////////////////////
5194
sewardj23f12002009-07-24 08:45:08 +00005195/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005196
sewardj23f12002009-07-24 08:45:08 +00005197static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005198 CacheLine* cl;
5199 UWord cloff, tno, toff;
5200 SVal svOld, svNew;
5201 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005202 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005203 cl = get_cacheline(a);
5204 cloff = get_cacheline_offset(a);
5205 tno = get_treeno(a);
5206 toff = get_tree_offset(a); /* == 0 .. 7 */
5207 descr = cl->descrs[tno];
5208 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5209 SVal* tree = &cl->svals[tno << 3];
5210 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005211 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005212 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5213 }
5214 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005215 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005216 if (CHECK_ZSM)
5217 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005218 cl->svals[cloff] = svNew;
5219}
5220
sewardj23f12002009-07-24 08:45:08 +00005221static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005222 CacheLine* cl;
5223 UWord cloff, tno, toff;
5224 SVal svOld, svNew;
5225 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005226 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005227 cl = get_cacheline(a);
5228 cloff = get_cacheline_offset(a);
5229 tno = get_treeno(a);
5230 toff = get_tree_offset(a); /* == 0 .. 7 */
5231 descr = cl->descrs[tno];
5232 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5233 SVal* tree = &cl->svals[tno << 3];
5234 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005235 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005236 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5237 }
5238 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005239 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005240 if (CHECK_ZSM)
5241 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005242 cl->svals[cloff] = svNew;
5243}
5244
sewardj23f12002009-07-24 08:45:08 +00005245/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005246
sewardj23f12002009-07-24 08:45:08 +00005247static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005248 CacheLine* cl;
5249 UWord cloff, tno, toff;
5250 SVal svOld, svNew;
5251 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005252 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005253 if (UNLIKELY(!aligned16(a))) goto slowcase;
5254 cl = get_cacheline(a);
5255 cloff = get_cacheline_offset(a);
5256 tno = get_treeno(a);
5257 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5258 descr = cl->descrs[tno];
5259 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5260 if (valid_value_is_below_me_16(descr, toff)) {
5261 goto slowcase;
5262 } else {
5263 SVal* tree = &cl->svals[tno << 3];
5264 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5265 }
sewardj8f5374e2008-12-07 11:40:17 +00005266 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005267 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5268 }
5269 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005270 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005271 if (CHECK_ZSM)
5272 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005273 cl->svals[cloff] = svNew;
5274 return;
5275 slowcase: /* misaligned, or must go further down the tree */
5276 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005277 zsm_sapply08__msmcread( thr, a + 0 );
5278 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005279}
5280
sewardj23f12002009-07-24 08:45:08 +00005281static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005282 CacheLine* cl;
5283 UWord cloff, tno, toff;
5284 SVal svOld, svNew;
5285 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005286 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005287 if (UNLIKELY(!aligned16(a))) goto slowcase;
5288 cl = get_cacheline(a);
5289 cloff = get_cacheline_offset(a);
5290 tno = get_treeno(a);
5291 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5292 descr = cl->descrs[tno];
5293 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5294 if (valid_value_is_below_me_16(descr, toff)) {
5295 goto slowcase;
5296 } else {
5297 SVal* tree = &cl->svals[tno << 3];
5298 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5299 }
sewardj8f5374e2008-12-07 11:40:17 +00005300 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005301 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5302 }
5303 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005304 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005305 if (CHECK_ZSM)
5306 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005307 cl->svals[cloff] = svNew;
5308 return;
5309 slowcase: /* misaligned, or must go further down the tree */
5310 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005311 zsm_sapply08__msmcwrite( thr, a + 0 );
5312 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00005313}
5314
sewardj23f12002009-07-24 08:45:08 +00005315/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005316
sewardj23f12002009-07-24 08:45:08 +00005317static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005318 CacheLine* cl;
5319 UWord cloff, tno, toff;
5320 SVal svOld, svNew;
5321 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005322 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005323 if (UNLIKELY(!aligned32(a))) goto slowcase;
5324 cl = get_cacheline(a);
5325 cloff = get_cacheline_offset(a);
5326 tno = get_treeno(a);
5327 toff = get_tree_offset(a); /* == 0 or 4 */
5328 descr = cl->descrs[tno];
5329 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5330 if (valid_value_is_above_me_32(descr, toff)) {
5331 SVal* tree = &cl->svals[tno << 3];
5332 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5333 } else {
5334 goto slowcase;
5335 }
sewardj8f5374e2008-12-07 11:40:17 +00005336 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005337 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5338 }
5339 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005340 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005341 if (CHECK_ZSM)
5342 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005343 cl->svals[cloff] = svNew;
5344 return;
5345 slowcase: /* misaligned, or must go further down the tree */
5346 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005347 zsm_sapply16__msmcread( thr, a + 0 );
5348 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005349}
5350
sewardj23f12002009-07-24 08:45:08 +00005351static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005352 CacheLine* cl;
5353 UWord cloff, tno, toff;
5354 SVal svOld, svNew;
5355 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005356 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005357 if (UNLIKELY(!aligned32(a))) goto slowcase;
5358 cl = get_cacheline(a);
5359 cloff = get_cacheline_offset(a);
5360 tno = get_treeno(a);
5361 toff = get_tree_offset(a); /* == 0 or 4 */
5362 descr = cl->descrs[tno];
5363 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5364 if (valid_value_is_above_me_32(descr, toff)) {
5365 SVal* tree = &cl->svals[tno << 3];
5366 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5367 } else {
5368 goto slowcase;
5369 }
sewardj8f5374e2008-12-07 11:40:17 +00005370 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005371 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5372 }
5373 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005374 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005375 if (CHECK_ZSM)
5376 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005377 cl->svals[cloff] = svNew;
5378 return;
5379 slowcase: /* misaligned, or must go further down the tree */
5380 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005381 zsm_sapply16__msmcwrite( thr, a + 0 );
5382 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00005383}
5384
sewardj23f12002009-07-24 08:45:08 +00005385/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005386
sewardj23f12002009-07-24 08:45:08 +00005387static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005388 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005389 UWord cloff, tno;
5390 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005391 SVal svOld, svNew;
5392 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005393 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005394 if (UNLIKELY(!aligned64(a))) goto slowcase;
5395 cl = get_cacheline(a);
5396 cloff = get_cacheline_offset(a);
5397 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005398 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005399 descr = cl->descrs[tno];
5400 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5401 goto slowcase;
5402 }
5403 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005404 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005405 if (CHECK_ZSM)
5406 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005407 cl->svals[cloff] = svNew;
5408 return;
5409 slowcase: /* misaligned, or must go further down the tree */
5410 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005411 zsm_sapply32__msmcread( thr, a + 0 );
5412 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005413}
5414
sewardj23f12002009-07-24 08:45:08 +00005415static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005416 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005417 UWord cloff, tno;
5418 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00005419 SVal svOld, svNew;
5420 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005421 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005422 if (UNLIKELY(!aligned64(a))) goto slowcase;
5423 cl = get_cacheline(a);
5424 cloff = get_cacheline_offset(a);
5425 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005426 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005427 descr = cl->descrs[tno];
5428 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5429 goto slowcase;
5430 }
5431 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00005432 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005433 if (CHECK_ZSM)
5434 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00005435 cl->svals[cloff] = svNew;
5436 return;
5437 slowcase: /* misaligned, or must go further down the tree */
5438 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005439 zsm_sapply32__msmcwrite( thr, a + 0 );
5440 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00005441}
5442
sewardj23f12002009-07-24 08:45:08 +00005443/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005444
5445static
sewardj23f12002009-07-24 08:45:08 +00005446void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005447 CacheLine* cl;
5448 UWord cloff, tno, toff;
5449 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005450 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005451 cl = get_cacheline(a);
5452 cloff = get_cacheline_offset(a);
5453 tno = get_treeno(a);
5454 toff = get_tree_offset(a); /* == 0 .. 7 */
5455 descr = cl->descrs[tno];
5456 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5457 SVal* tree = &cl->svals[tno << 3];
5458 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005459 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005460 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5461 }
5462 tl_assert(svNew != SVal_INVALID);
5463 cl->svals[cloff] = svNew;
5464}
5465
sewardj23f12002009-07-24 08:45:08 +00005466/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005467
5468static
sewardj23f12002009-07-24 08:45:08 +00005469void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005470 CacheLine* cl;
5471 UWord cloff, tno, toff;
5472 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005473 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00005474 if (UNLIKELY(!aligned16(a))) goto slowcase;
5475 cl = get_cacheline(a);
5476 cloff = get_cacheline_offset(a);
5477 tno = get_treeno(a);
5478 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5479 descr = cl->descrs[tno];
5480 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5481 if (valid_value_is_below_me_16(descr, toff)) {
5482 /* Writing at this level. Need to fix up 'descr'. */
5483 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5484 /* At this point, the tree does not match cl->descr[tno] any
5485 more. The assignments below will fix it up. */
5486 } else {
5487 /* We can't indiscriminately write on the w16 node as in the
5488 w64 case, as that might make the node inconsistent with
5489 its parent. So first, pull down to this level. */
5490 SVal* tree = &cl->svals[tno << 3];
5491 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005492 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005493 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5494 }
5495 }
5496 tl_assert(svNew != SVal_INVALID);
5497 cl->svals[cloff + 0] = svNew;
5498 cl->svals[cloff + 1] = SVal_INVALID;
5499 return;
5500 slowcase: /* misaligned */
5501 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00005502 zsm_swrite08( a + 0, svNew );
5503 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005504}
5505
sewardj23f12002009-07-24 08:45:08 +00005506/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005507
5508static
sewardj23f12002009-07-24 08:45:08 +00005509void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005510 CacheLine* cl;
5511 UWord cloff, tno, toff;
5512 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005513 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00005514 if (UNLIKELY(!aligned32(a))) goto slowcase;
5515 cl = get_cacheline(a);
5516 cloff = get_cacheline_offset(a);
5517 tno = get_treeno(a);
5518 toff = get_tree_offset(a); /* == 0 or 4 */
5519 descr = cl->descrs[tno];
5520 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5521 if (valid_value_is_above_me_32(descr, toff)) {
5522 /* We can't indiscriminately write on the w32 node as in the
5523 w64 case, as that might make the node inconsistent with
5524 its parent. So first, pull down to this level. */
5525 SVal* tree = &cl->svals[tno << 3];
5526 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00005527 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00005528 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5529 } else {
5530 /* Writing at this level. Need to fix up 'descr'. */
5531 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5532 /* At this point, the tree does not match cl->descr[tno] any
5533 more. The assignments below will fix it up. */
5534 }
5535 }
5536 tl_assert(svNew != SVal_INVALID);
5537 cl->svals[cloff + 0] = svNew;
5538 cl->svals[cloff + 1] = SVal_INVALID;
5539 cl->svals[cloff + 2] = SVal_INVALID;
5540 cl->svals[cloff + 3] = SVal_INVALID;
5541 return;
5542 slowcase: /* misaligned */
5543 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00005544 zsm_swrite16( a + 0, svNew );
5545 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005546}
5547
sewardj23f12002009-07-24 08:45:08 +00005548/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005549
5550static
sewardj23f12002009-07-24 08:45:08 +00005551void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00005552 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00005553 UWord cloff, tno;
5554 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00005555 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00005556 if (UNLIKELY(!aligned64(a))) goto slowcase;
5557 cl = get_cacheline(a);
5558 cloff = get_cacheline_offset(a);
5559 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00005560 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00005561 cl->descrs[tno] = TREE_DESCR_64;
5562 tl_assert(svNew != SVal_INVALID);
5563 cl->svals[cloff + 0] = svNew;
5564 cl->svals[cloff + 1] = SVal_INVALID;
5565 cl->svals[cloff + 2] = SVal_INVALID;
5566 cl->svals[cloff + 3] = SVal_INVALID;
5567 cl->svals[cloff + 4] = SVal_INVALID;
5568 cl->svals[cloff + 5] = SVal_INVALID;
5569 cl->svals[cloff + 6] = SVal_INVALID;
5570 cl->svals[cloff + 7] = SVal_INVALID;
5571 return;
5572 slowcase: /* misaligned */
5573 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00005574 zsm_swrite32( a + 0, svNew );
5575 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005576}
5577
sewardj23f12002009-07-24 08:45:08 +00005578/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00005579
5580static
sewardj23f12002009-07-24 08:45:08 +00005581SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00005582 CacheLine* cl;
5583 UWord cloff, tno, toff;
5584 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00005585 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00005586 cl = get_cacheline(a);
5587 cloff = get_cacheline_offset(a);
5588 tno = get_treeno(a);
5589 toff = get_tree_offset(a); /* == 0 .. 7 */
5590 descr = cl->descrs[tno];
5591 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5592 SVal* tree = &cl->svals[tno << 3];
5593 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5594 }
5595 return cl->svals[cloff];
5596}
5597
sewardj23f12002009-07-24 08:45:08 +00005598static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00005599 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00005600 stats__cline_scopy08s++;
5601 sv = zsm_sread08( src );
5602 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00005603}
5604
5605
sewardj23f12002009-07-24 08:45:08 +00005606/* Block-copy states (needed for implementing realloc()). Note this
5607 doesn't change the filtering arrangements. The caller of
5608 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00005609
sewardj23f12002009-07-24 08:45:08 +00005610static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005611{
5612 SizeT i;
5613 if (len == 0)
5614 return;
5615
5616 /* assert for non-overlappingness */
5617 tl_assert(src+len <= dst || dst+len <= src);
5618
5619 /* To be simple, just copy byte by byte. But so as not to wreck
5620 performance for later accesses to dst[0 .. len-1], normalise
5621 destination lines as we finish with them, and also normalise the
5622 line containing the first and last address. */
5623 for (i = 0; i < len; i++) {
5624 Bool normalise
5625 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5626 || i == 0 /* first in range */
5627 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00005628 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00005629 }
5630}
5631
5632
5633/* For setting address ranges to a given value. Has considerable
5634 sophistication so as to avoid generating large numbers of pointless
5635 cache loads/writebacks for large ranges. */
5636
5637/* Do small ranges in-cache, in the obvious way. */
5638static
sewardj23f12002009-07-24 08:45:08 +00005639void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005640{
5641 /* fast track a couple of common cases */
5642 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00005643 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005644 return;
5645 }
5646 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00005647 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005648 return;
5649 }
5650
5651 /* be completely general (but as efficient as possible) */
5652 if (len == 0) return;
5653
5654 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005655 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005656 a += 1;
5657 len -= 1;
5658 tl_assert(aligned16(a));
5659 }
5660 if (len == 0) return;
5661
5662 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005663 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005664 a += 2;
5665 len -= 2;
5666 tl_assert(aligned32(a));
5667 }
5668 if (len == 0) return;
5669
5670 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005671 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005672 a += 4;
5673 len -= 4;
5674 tl_assert(aligned64(a));
5675 }
5676 if (len == 0) return;
5677
5678 if (len >= 8) {
5679 tl_assert(aligned64(a));
5680 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00005681 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005682 a += 8;
5683 len -= 8;
5684 }
5685 tl_assert(aligned64(a));
5686 }
5687 if (len == 0) return;
5688
5689 if (len >= 4)
5690 tl_assert(aligned32(a));
5691 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00005692 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005693 a += 4;
5694 len -= 4;
5695 }
5696 if (len == 0) return;
5697
5698 if (len >= 2)
5699 tl_assert(aligned16(a));
5700 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00005701 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005702 a += 2;
5703 len -= 2;
5704 }
5705 if (len == 0) return;
5706
5707 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00005708 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00005709 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00005710 len -= 1;
5711 }
5712 tl_assert(len == 0);
5713}
5714
5715
sewardj23f12002009-07-24 08:45:08 +00005716/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00005717 for larger ranges, try to operate directly on the out-of-cache
5718 representation, rather than dragging lines into the cache,
5719 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00005720 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00005721
sewardj23f12002009-07-24 08:45:08 +00005722 Note that this doesn't change the filtering arrangements. The
5723 caller of zsm_sset_range needs to attend to that. */
5724
5725static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00005726{
5727 tl_assert(svNew != SVal_INVALID);
5728 stats__cache_make_New_arange += (ULong)len;
5729
5730 if (0 && len > 500)
5731 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
5732
5733 if (0) {
5734 static UWord n_New_in_cache = 0;
5735 static UWord n_New_not_in_cache = 0;
5736 /* tag is 'a' with the in-line offset masked out,
5737 eg a[31]..a[4] 0000 */
5738 Addr tag = a & ~(N_LINE_ARANGE - 1);
5739 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5740 if (LIKELY(tag == cache_shmem.tags0[wix])) {
5741 n_New_in_cache++;
5742 } else {
5743 n_New_not_in_cache++;
5744 }
5745 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5746 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5747 n_New_in_cache, n_New_not_in_cache );
5748 }
5749
5750 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00005751 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005752 } else {
5753 Addr before_start = a;
5754 Addr aligned_start = cacheline_ROUNDUP(a);
5755 Addr after_start = cacheline_ROUNDDN(a + len);
5756 UWord before_len = aligned_start - before_start;
5757 UWord aligned_len = after_start - aligned_start;
5758 UWord after_len = a + len - after_start;
5759 tl_assert(before_start <= aligned_start);
5760 tl_assert(aligned_start <= after_start);
5761 tl_assert(before_len < N_LINE_ARANGE);
5762 tl_assert(after_len < N_LINE_ARANGE);
5763 tl_assert(get_cacheline_offset(aligned_start) == 0);
5764 if (get_cacheline_offset(a) == 0) {
5765 tl_assert(before_len == 0);
5766 tl_assert(a == aligned_start);
5767 }
5768 if (get_cacheline_offset(a+len) == 0) {
5769 tl_assert(after_len == 0);
5770 tl_assert(after_start == a+len);
5771 }
5772 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005773 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005774 }
5775 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00005776 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005777 }
5778 stats__cache_make_New_inZrep += (ULong)aligned_len;
5779
5780 while (1) {
5781 Addr tag;
5782 UWord wix;
5783 if (aligned_start >= after_start)
5784 break;
5785 tl_assert(get_cacheline_offset(aligned_start) == 0);
5786 tag = aligned_start & ~(N_LINE_ARANGE - 1);
5787 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5788 if (tag == cache_shmem.tags0[wix]) {
5789 UWord i;
5790 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00005791 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00005792 } else {
5793 UWord i;
5794 Word zix;
5795 SecMap* sm;
5796 LineZ* lineZ;
5797 /* This line is not in the cache. Do not force it in; instead
5798 modify it in-place. */
5799 /* find the Z line to write in and rcdec it or the
5800 associated F line. */
5801 find_Z_for_writing( &sm, &zix, tag );
5802 tl_assert(sm);
5803 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5804 lineZ = &sm->linesZ[zix];
5805 lineZ->dict[0] = svNew;
5806 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5807 for (i = 0; i < N_LINE_ARANGE/4; i++)
5808 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5809 rcinc_LineZ(lineZ);
5810 }
5811 aligned_start += N_LINE_ARANGE;
5812 aligned_len -= N_LINE_ARANGE;
5813 }
5814 tl_assert(aligned_start == after_start);
5815 tl_assert(aligned_len == 0);
5816 }
5817}
5818
5819
5820/////////////////////////////////////////////////////////
5821// //
sewardj23f12002009-07-24 08:45:08 +00005822// Front-filtering accesses //
5823// //
5824/////////////////////////////////////////////////////////
5825
5826static UWord stats__f_ac = 0;
5827static UWord stats__f_sk = 0;
5828
5829#if 0
5830# define STATS__F_SHOW \
5831 do { \
5832 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5833 VG_(printf)("filters: ac %lu sk %lu\n", \
5834 stats__f_ac, stats__f_sk); \
5835 } while (0)
5836#else
5837# define STATS__F_SHOW /* */
5838#endif
5839
5840void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5841 stats__f_ac++;
5842 STATS__F_SHOW;
5843 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5844 stats__f_sk++;
5845 return;
5846 }
5847 zsm_sapply08__msmcwrite(thr, a);
5848}
5849
5850void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5851 stats__f_ac++;
5852 STATS__F_SHOW;
5853 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5854 stats__f_sk++;
5855 return;
5856 }
5857 zsm_sapply16__msmcwrite(thr, a);
5858}
5859
5860void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5861 stats__f_ac++;
5862 STATS__F_SHOW;
5863 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5864 stats__f_sk++;
5865 return;
5866 }
5867 zsm_sapply32__msmcwrite(thr, a);
5868}
5869
5870void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5871 stats__f_ac++;
5872 STATS__F_SHOW;
5873 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5874 stats__f_sk++;
5875 return;
5876 }
5877 zsm_sapply64__msmcwrite(thr, a);
5878}
5879
5880void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5881{
5882 /* fast track a couple of common cases */
5883 if (len == 4 && aligned32(a)) {
5884 zsm_sapply32_f__msmcwrite( thr, a );
5885 return;
5886 }
5887 if (len == 8 && aligned64(a)) {
5888 zsm_sapply64_f__msmcwrite( thr, a );
5889 return;
5890 }
5891
5892 /* be completely general (but as efficient as possible) */
5893 if (len == 0) return;
5894
5895 if (!aligned16(a) && len >= 1) {
5896 zsm_sapply08_f__msmcwrite( thr, a );
5897 a += 1;
5898 len -= 1;
5899 tl_assert(aligned16(a));
5900 }
5901 if (len == 0) return;
5902
5903 if (!aligned32(a) && len >= 2) {
5904 zsm_sapply16_f__msmcwrite( thr, a );
5905 a += 2;
5906 len -= 2;
5907 tl_assert(aligned32(a));
5908 }
5909 if (len == 0) return;
5910
5911 if (!aligned64(a) && len >= 4) {
5912 zsm_sapply32_f__msmcwrite( thr, a );
5913 a += 4;
5914 len -= 4;
5915 tl_assert(aligned64(a));
5916 }
5917 if (len == 0) return;
5918
5919 if (len >= 8) {
5920 tl_assert(aligned64(a));
5921 while (len >= 8) {
5922 zsm_sapply64_f__msmcwrite( thr, a );
5923 a += 8;
5924 len -= 8;
5925 }
5926 tl_assert(aligned64(a));
5927 }
5928 if (len == 0) return;
5929
5930 if (len >= 4)
5931 tl_assert(aligned32(a));
5932 if (len >= 4) {
5933 zsm_sapply32_f__msmcwrite( thr, a );
5934 a += 4;
5935 len -= 4;
5936 }
5937 if (len == 0) return;
5938
5939 if (len >= 2)
5940 tl_assert(aligned16(a));
5941 if (len >= 2) {
5942 zsm_sapply16_f__msmcwrite( thr, a );
5943 a += 2;
5944 len -= 2;
5945 }
5946 if (len == 0) return;
5947
5948 if (len >= 1) {
5949 zsm_sapply08_f__msmcwrite( thr, a );
5950 //a += 1;
5951 len -= 1;
5952 }
5953 tl_assert(len == 0);
5954}
5955
5956void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5957 stats__f_ac++;
5958 STATS__F_SHOW;
5959 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5960 stats__f_sk++;
5961 return;
5962 }
5963 zsm_sapply08__msmcread(thr, a);
5964}
5965
5966void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5967 stats__f_ac++;
5968 STATS__F_SHOW;
5969 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5970 stats__f_sk++;
5971 return;
5972 }
5973 zsm_sapply16__msmcread(thr, a);
5974}
5975
5976void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5977 stats__f_ac++;
5978 STATS__F_SHOW;
5979 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5980 stats__f_sk++;
5981 return;
5982 }
5983 zsm_sapply32__msmcread(thr, a);
5984}
5985
5986void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5987 stats__f_ac++;
5988 STATS__F_SHOW;
5989 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5990 stats__f_sk++;
5991 return;
5992 }
5993 zsm_sapply64__msmcread(thr, a);
5994}
5995
5996void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5997{
5998 /* fast track a couple of common cases */
5999 if (len == 4 && aligned32(a)) {
6000 zsm_sapply32_f__msmcread( thr, a );
6001 return;
6002 }
6003 if (len == 8 && aligned64(a)) {
6004 zsm_sapply64_f__msmcread( thr, a );
6005 return;
6006 }
6007
6008 /* be completely general (but as efficient as possible) */
6009 if (len == 0) return;
6010
6011 if (!aligned16(a) && len >= 1) {
6012 zsm_sapply08_f__msmcread( thr, a );
6013 a += 1;
6014 len -= 1;
6015 tl_assert(aligned16(a));
6016 }
6017 if (len == 0) return;
6018
6019 if (!aligned32(a) && len >= 2) {
6020 zsm_sapply16_f__msmcread( thr, a );
6021 a += 2;
6022 len -= 2;
6023 tl_assert(aligned32(a));
6024 }
6025 if (len == 0) return;
6026
6027 if (!aligned64(a) && len >= 4) {
6028 zsm_sapply32_f__msmcread( thr, a );
6029 a += 4;
6030 len -= 4;
6031 tl_assert(aligned64(a));
6032 }
6033 if (len == 0) return;
6034
6035 if (len >= 8) {
6036 tl_assert(aligned64(a));
6037 while (len >= 8) {
6038 zsm_sapply64_f__msmcread( thr, a );
6039 a += 8;
6040 len -= 8;
6041 }
6042 tl_assert(aligned64(a));
6043 }
6044 if (len == 0) return;
6045
6046 if (len >= 4)
6047 tl_assert(aligned32(a));
6048 if (len >= 4) {
6049 zsm_sapply32_f__msmcread( thr, a );
6050 a += 4;
6051 len -= 4;
6052 }
6053 if (len == 0) return;
6054
6055 if (len >= 2)
6056 tl_assert(aligned16(a));
6057 if (len >= 2) {
6058 zsm_sapply16_f__msmcread( thr, a );
6059 a += 2;
6060 len -= 2;
6061 }
6062 if (len == 0) return;
6063
6064 if (len >= 1) {
6065 zsm_sapply08_f__msmcread( thr, a );
6066 //a += 1;
6067 len -= 1;
6068 }
6069 tl_assert(len == 0);
6070}
6071
6072void libhb_Thr_resumes ( Thr* thr )
6073{
6074 if (0) VG_(printf)("resume %p\n", thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006075 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006076 tl_assert(!thr->llexit_done);
sewardj23f12002009-07-24 08:45:08 +00006077 Filter__clear(thr->filter, "libhb_Thr_resumes");
6078 /* A kludge, but .. if this thread doesn't have any marker stacks
6079 at all, get one right now. This is easier than figuring out
6080 exactly when at thread startup we can and can't take a stack
6081 snapshot. */
sewardj2d2ea2f2009-08-02 10:15:07 +00006082 if (HG_(clo_history_level) == 1) {
6083 tl_assert(thr->local_Kws_n_stacks);
6084 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
6085 note_local_Kw_n_stack_for(thr);
6086 }
sewardj23f12002009-07-24 08:45:08 +00006087}
6088
6089
6090/////////////////////////////////////////////////////////
6091// //
sewardjf98e1c02008-10-25 16:22:41 +00006092// Synchronisation objects //
6093// //
6094/////////////////////////////////////////////////////////
6095
sewardjffce8152011-06-24 10:09:41 +00006096/* A double linked list of all the SO's. */
6097SO* admin_SO = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006098
sewardjffce8152011-06-24 10:09:41 +00006099static SO* SO__Alloc ( void )
6100{
sewardjf98e1c02008-10-25 16:22:41 +00006101 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6102 so->viR = VtsID_INVALID;
6103 so->viW = VtsID_INVALID;
6104 so->magic = SO_MAGIC;
sewardjffce8152011-06-24 10:09:41 +00006105 /* Add to double linked list */
6106 if (admin_SO) {
6107 tl_assert(admin_SO->admin_prev == NULL);
6108 admin_SO->admin_prev = so;
6109 so->admin_next = admin_SO;
6110 } else {
6111 so->admin_next = NULL;
6112 }
6113 so->admin_prev = NULL;
6114 admin_SO = so;
6115 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006116 return so;
6117}
sewardjffce8152011-06-24 10:09:41 +00006118
6119static void SO__Dealloc ( SO* so )
6120{
sewardjf98e1c02008-10-25 16:22:41 +00006121 tl_assert(so);
6122 tl_assert(so->magic == SO_MAGIC);
6123 if (so->viR == VtsID_INVALID) {
6124 tl_assert(so->viW == VtsID_INVALID);
6125 } else {
6126 tl_assert(so->viW != VtsID_INVALID);
6127 VtsID__rcdec(so->viR);
6128 VtsID__rcdec(so->viW);
6129 }
6130 so->magic = 0;
sewardjffce8152011-06-24 10:09:41 +00006131 /* Del from double linked list */
6132 if (so->admin_prev)
6133 so->admin_prev->admin_next = so->admin_next;
6134 if (so->admin_next)
6135 so->admin_next->admin_prev = so->admin_prev;
6136 if (so == admin_SO)
6137 admin_SO = so->admin_next;
6138 /* */
sewardjf98e1c02008-10-25 16:22:41 +00006139 HG_(free)( so );
6140}
6141
6142
6143/////////////////////////////////////////////////////////
6144// //
6145// Top Level API //
6146// //
6147/////////////////////////////////////////////////////////
6148
6149static void show_thread_state ( HChar* str, Thr* t )
6150{
6151 if (1) return;
6152 if (t->viR == t->viW) {
6153 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6154 VtsID__pp( t->viR );
6155 VG_(printf)("%s","\n");
6156 } else {
6157 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6158 VtsID__pp( t->viR );
6159 VG_(printf)(" viW %u==", t->viW);
6160 VtsID__pp( t->viW );
6161 VG_(printf)("%s","\n");
6162 }
6163}
6164
6165
6166Thr* libhb_init (
6167 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00006168 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00006169 )
6170{
6171 Thr* thr;
6172 VtsID vi;
sewardje4cce742011-02-24 15:25:24 +00006173
6174 // We will have to have to store a large number of these,
6175 // so make sure they're the size we expect them to be.
6176 tl_assert(sizeof(ScalarTS) == 8);
sewardjffce8152011-06-24 10:09:41 +00006177
6178 /* because first 1024 unusable */
6179 tl_assert(SCALARTS_N_THRBITS >= 11);
6180 /* so as to fit in a UInt w/ 3 bits to spare (see defn of
6181 Thr_n_RCEC). */
6182 tl_assert(SCALARTS_N_THRBITS <= 29);
6183
6184 /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6185 (32-bit). It's not correctness-critical, but there are a lot of
6186 them, so it's important from a space viewpoint. Unfortunately
6187 we simply can't pack it into 2 words on a 32-bit target. */
6188 if (sizeof(UWord) == 8) {
6189 tl_assert(sizeof(Thr_n_RCEC) == 16);
6190 } else {
6191 tl_assert(sizeof(Thr_n_RCEC) == 12);
6192 }
6193
6194 /* Word sets really are 32 bits. Even on a 64 bit target. */
6195 tl_assert(sizeof(WordSetID) == 4);
6196 tl_assert(sizeof(WordSet) == sizeof(WordSetID));
sewardje4cce742011-02-24 15:25:24 +00006197
sewardjf98e1c02008-10-25 16:22:41 +00006198 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00006199 tl_assert(get_EC);
6200 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00006201 main_get_EC = get_EC;
6202
6203 // No need to initialise hg_wordfm.
6204 // No need to initialise hg_wordset.
6205
sewardj7aa38a92011-02-27 23:04:12 +00006206 /* Allocated once and never deallocated. Used as a temporary in
6207 VTS singleton, tick and join operations. */
6208 temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6209 temp_max_sized_VTS->id = VtsID_INVALID;
sewardjffce8152011-06-24 10:09:41 +00006210 verydead_thread_table_init();
sewardjf98e1c02008-10-25 16:22:41 +00006211 vts_set_init();
6212 vts_tab_init();
6213 event_map_init();
6214 VtsID__invalidate_caches();
6215
6216 // initialise shadow memory
6217 zsm_init( SVal__rcinc, SVal__rcdec );
6218
6219 thr = Thr__new();
6220 vi = VtsID__mk_Singleton( thr, 1 );
6221 thr->viR = vi;
6222 thr->viW = vi;
6223 VtsID__rcinc(thr->viR);
6224 VtsID__rcinc(thr->viW);
6225
6226 show_thread_state(" root", thr);
6227 return thr;
6228}
6229
sewardj23f12002009-07-24 08:45:08 +00006230
sewardjf98e1c02008-10-25 16:22:41 +00006231Thr* libhb_create ( Thr* parent )
6232{
6233 /* The child's VTSs are copies of the parent's VTSs, but ticked at
6234 the child's index. Since the child's index is guaranteed
6235 unique, it has never been seen before, so the implicit value
6236 before the tick is zero and after that is one. */
6237 Thr* child = Thr__new();
6238
6239 child->viR = VtsID__tick( parent->viR, child );
6240 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00006241 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00006242 VtsID__rcinc(child->viR);
6243 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006244 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00006245 early for that - it may not have a valid TId yet. So, let
6246 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00006247
6248 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6249 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6250
6251 /* and the parent has to move along too */
6252 VtsID__rcdec(parent->viR);
6253 VtsID__rcdec(parent->viW);
6254 parent->viR = VtsID__tick( parent->viR, parent );
6255 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00006256 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00006257 VtsID__rcinc(parent->viR);
6258 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00006259 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00006260
6261 show_thread_state(" child", child);
6262 show_thread_state("parent", parent);
6263
6264 return child;
6265}
6266
6267/* Shut down the library, and print stats (in fact that's _all_
6268 this is for. */
6269void libhb_shutdown ( Bool show_stats )
6270{
6271 if (show_stats) {
6272 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6273 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6274 stats__secmaps_allocd,
6275 stats__secmap_ga_space_covered);
6276 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6277 stats__secmap_linesZ_allocd,
6278 stats__secmap_linesZ_bytes);
6279 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
6280 stats__secmap_linesF_allocd,
6281 stats__secmap_linesF_bytes);
6282 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
6283 stats__secmap_iterator_steppings);
6284 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6285 stats__secmaps_search, stats__secmaps_search_slow);
6286
6287 VG_(printf)("%s","\n");
6288 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
6289 stats__cache_totrefs, stats__cache_totmisses );
6290 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
6291 stats__cache_Z_fetches, stats__cache_F_fetches );
6292 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
6293 stats__cache_Z_wbacks, stats__cache_F_wbacks );
6294 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
6295 stats__cache_invals, stats__cache_flushes );
6296 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
6297 stats__cache_make_New_arange,
6298 stats__cache_make_New_inZrep);
6299
6300 VG_(printf)("%s","\n");
6301 VG_(printf)(" cline: %'10lu normalises\n",
6302 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00006303 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6304 stats__cline_cread64s,
6305 stats__cline_cread32s,
6306 stats__cline_cread16s,
6307 stats__cline_cread08s );
6308 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6309 stats__cline_cwrite64s,
6310 stats__cline_cwrite32s,
6311 stats__cline_cwrite16s,
6312 stats__cline_cwrite08s );
6313 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6314 stats__cline_swrite64s,
6315 stats__cline_swrite32s,
6316 stats__cline_swrite16s,
6317 stats__cline_swrite08s );
6318 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
6319 stats__cline_sread08s, stats__cline_scopy08s );
sewardjf98e1c02008-10-25 16:22:41 +00006320 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
6321 stats__cline_64to32splits,
6322 stats__cline_32to16splits,
6323 stats__cline_16to8splits );
6324 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
6325 stats__cline_64to32pulldown,
6326 stats__cline_32to16pulldown,
6327 stats__cline_16to8pulldown );
6328 if (0)
6329 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
6330 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
6331
6332 VG_(printf)("%s","\n");
6333
sewardjc8028ad2010-05-05 09:34:42 +00006334 VG_(printf)(" libhb: %'13llu msmcread (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006335 stats__msmcread, stats__msmcread_change);
sewardjc8028ad2010-05-05 09:34:42 +00006336 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu dragovers)\n",
sewardj23f12002009-07-24 08:45:08 +00006337 stats__msmcwrite, stats__msmcwrite_change);
6338 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6339 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00006340 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
6341 stats__join2_queries, stats__join2_misses);
6342
6343 VG_(printf)("%s","\n");
sewardjc8028ad2010-05-05 09:34:42 +00006344 VG_(printf)( " libhb: VTSops: tick %'lu, join %'lu, cmpLEQ %'lu\n",
6345 stats__vts__tick, stats__vts__join, stats__vts__cmpLEQ );
6346 VG_(printf)( " libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6347 stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
sewardj7aa38a92011-02-27 23:04:12 +00006348 VG_(printf)( " libhb: VTSset: find__or__clone_and_add %'lu (%'lu allocd)\n",
6349 stats__vts_set__focaa, stats__vts_set__focaa_a );
sewardjc8028ad2010-05-05 09:34:42 +00006350 VG_(printf)( " libhb: VTSops: indexAt_SLOW %'lu\n",
6351 stats__vts__indexat_slow );
6352
6353 VG_(printf)("%s","\n");
sewardjf98e1c02008-10-25 16:22:41 +00006354 VG_(printf)(
6355 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6356 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6357 );
6358 VG_(printf)( " libhb: %lu entries in vts_set\n",
6359 VG_(sizeFM)( vts_set ) );
6360
6361 VG_(printf)("%s","\n");
6362 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
6363 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
6364 stats__ctxt_rcdec2,
6365 stats__ctxt_rcdec3 );
6366 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
6367 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
6368 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
6369 (UWord)N_RCEC_TAB,
6370 stats__ctxt_tab_curr );
6371 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
6372 stats__ctxt_tab_qs,
6373 stats__ctxt_tab_cmps );
6374#if 0
6375 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
6376 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
6377 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
6378 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
6379 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
6380 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
6381 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
6382 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
6383 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
6384 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6385 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
6386 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
6387 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
6388 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
6389
6390 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
6391 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
6392 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
6393 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
6394#endif
6395
6396 VG_(printf)("%s","<<< END libhb stats >>>\n");
6397 VG_(printf)("%s","\n");
6398
6399 }
6400}
6401
sewardjffce8152011-06-24 10:09:41 +00006402/* Receive notification that a thread has low level exited. The
6403 significance here is that we do not expect to see any more memory
6404 references from it. */
sewardjf98e1c02008-10-25 16:22:41 +00006405void libhb_async_exit ( Thr* thr )
6406{
sewardj23f12002009-07-24 08:45:08 +00006407 tl_assert(thr);
sewardjffce8152011-06-24 10:09:41 +00006408 tl_assert(!thr->llexit_done);
6409 thr->llexit_done = True;
sewardj2d2ea2f2009-08-02 10:15:07 +00006410
6411 /* free up Filter and local_Kws_n_stacks (well, actually not the
6412 latter ..) */
6413 tl_assert(thr->filter);
6414 HG_(free)(thr->filter);
6415 thr->filter = NULL;
6416
sewardjffce8152011-06-24 10:09:41 +00006417 /* Tell the VTS mechanism this thread has exited, so it can
6418 participate in VTS pruning. Note this can only happen if the
6419 thread has both ll_exited and has been joined with. */
6420 if (thr->joinedwith_done)
6421 VTS__declare_thread_very_dead(thr);
6422
sewardj2d2ea2f2009-08-02 10:15:07 +00006423 /* Another space-accuracy tradeoff. Do we want to be able to show
6424 H1 history for conflicts in threads which have since exited? If
6425 yes, then we better not free up thr->local_Kws_n_stacks. The
6426 downside is a potential per-thread leak of up to
6427 N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6428 XArray average overcommit factor is (1.5 I'd guess). */
6429 // hence:
6430 // VG_(deleteXA)(thr->local_Kws_n_stacks);
6431 // thr->local_Kws_n_stacks = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00006432}
6433
sewardjffce8152011-06-24 10:09:41 +00006434/* Receive notification that a thread has been joined with. The
6435 significance here is that we do not expect to see any further
6436 references to its vector clocks (Thr::viR and Thr::viW). */
6437void libhb_joinedwith_done ( Thr* thr )
6438{
6439 tl_assert(thr);
6440 /* Caller must ensure that this is only ever called once per Thr. */
6441 tl_assert(!thr->joinedwith_done);
6442 thr->joinedwith_done = True;
6443 if (thr->llexit_done)
6444 VTS__declare_thread_very_dead(thr);
6445}
6446
6447
sewardjf98e1c02008-10-25 16:22:41 +00006448/* Both Segs and SOs point to VTSs. However, there is no sharing, so
6449 a Seg that points at a VTS is its one-and-only owner, and ditto for
6450 a SO that points at a VTS. */
6451
6452SO* libhb_so_alloc ( void )
6453{
6454 return SO__Alloc();
6455}
6456
6457void libhb_so_dealloc ( SO* so )
6458{
6459 tl_assert(so);
6460 tl_assert(so->magic == SO_MAGIC);
6461 SO__Dealloc(so);
6462}
6463
6464/* See comments in libhb.h for details on the meaning of
6465 strong vs weak sends and strong vs weak receives. */
6466void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6467{
6468 /* Copy the VTSs from 'thr' into the sync object, and then move
6469 the thread along one step. */
6470
6471 tl_assert(so);
6472 tl_assert(so->magic == SO_MAGIC);
6473
6474 /* stay sane .. a thread's read-clock must always lead or be the
6475 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00006476 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6477 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00006478 }
6479
6480 /* since we're overwriting the VtsIDs in the SO, we need to drop
6481 any references made by the previous contents thereof */
6482 if (so->viR == VtsID_INVALID) {
6483 tl_assert(so->viW == VtsID_INVALID);
6484 so->viR = thr->viR;
6485 so->viW = thr->viW;
6486 VtsID__rcinc(so->viR);
6487 VtsID__rcinc(so->viW);
6488 } else {
6489 /* In a strong send, we dump any previous VC in the SO and
6490 install the sending thread's VC instead. For a weak send we
6491 must join2 with what's already there. */
6492 tl_assert(so->viW != VtsID_INVALID);
6493 VtsID__rcdec(so->viR);
6494 VtsID__rcdec(so->viW);
6495 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6496 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6497 VtsID__rcinc(so->viR);
6498 VtsID__rcinc(so->viW);
6499 }
6500
6501 /* move both parent clocks along */
6502 VtsID__rcdec(thr->viR);
6503 VtsID__rcdec(thr->viW);
6504 thr->viR = VtsID__tick( thr->viR, thr );
6505 thr->viW = VtsID__tick( thr->viW, thr );
sewardjffce8152011-06-24 10:09:41 +00006506 if (!thr->llexit_done) {
sewardj2d2ea2f2009-08-02 10:15:07 +00006507 Filter__clear(thr->filter, "libhb_so_send");
sewardj8ab2c132009-08-02 09:34:35 +00006508 note_local_Kw_n_stack_for(thr);
sewardj2d2ea2f2009-08-02 10:15:07 +00006509 }
sewardjf98e1c02008-10-25 16:22:41 +00006510 VtsID__rcinc(thr->viR);
6511 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006512
sewardjf98e1c02008-10-25 16:22:41 +00006513 if (strong_send)
6514 show_thread_state("s-send", thr);
6515 else
6516 show_thread_state("w-send", thr);
6517}
6518
6519void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6520{
6521 tl_assert(so);
6522 tl_assert(so->magic == SO_MAGIC);
6523
6524 if (so->viR != VtsID_INVALID) {
6525 tl_assert(so->viW != VtsID_INVALID);
6526
6527 /* Weak receive (basically, an R-acquisition of a R-W lock).
6528 This advances the read-clock of the receiver, but not the
6529 write-clock. */
6530 VtsID__rcdec(thr->viR);
6531 thr->viR = VtsID__join2( thr->viR, so->viR );
6532 VtsID__rcinc(thr->viR);
6533
sewardj90eb22e2009-07-28 20:22:18 +00006534 /* At one point (r10589) it seemed safest to tick the clocks for
6535 the receiving thread after the join. But on reflection, I
6536 wonder if that might cause it to 'overtake' constraints,
6537 which could lead to missing races. So, back out that part of
6538 r10589. */
6539 //VtsID__rcdec(thr->viR);
6540 //thr->viR = VtsID__tick( thr->viR, thr );
6541 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00006542
sewardjf98e1c02008-10-25 16:22:41 +00006543 /* For a strong receive, we also advance the receiver's write
6544 clock, which means the receive as a whole is essentially
6545 equivalent to a W-acquisition of a R-W lock. */
6546 if (strong_recv) {
6547 VtsID__rcdec(thr->viW);
6548 thr->viW = VtsID__join2( thr->viW, so->viW );
6549 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00006550
sewardj90eb22e2009-07-28 20:22:18 +00006551 /* See comment just above, re r10589. */
6552 //VtsID__rcdec(thr->viW);
6553 //thr->viW = VtsID__tick( thr->viW, thr );
6554 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00006555 }
6556
sewardjf4845dc2010-05-28 20:09:59 +00006557 if (thr->filter)
6558 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00006559 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00006560
sewardjf98e1c02008-10-25 16:22:41 +00006561 if (strong_recv)
6562 show_thread_state("s-recv", thr);
6563 else
6564 show_thread_state("w-recv", thr);
6565
6566 } else {
6567 tl_assert(so->viW == VtsID_INVALID);
6568 /* Deal with degenerate case: 'so' has no vts, so there has been
6569 no message posted to it. Just ignore this case. */
6570 show_thread_state("d-recv", thr);
6571 }
6572}
6573
6574Bool libhb_so_everSent ( SO* so )
6575{
6576 if (so->viR == VtsID_INVALID) {
6577 tl_assert(so->viW == VtsID_INVALID);
6578 return False;
6579 } else {
6580 tl_assert(so->viW != VtsID_INVALID);
6581 return True;
6582 }
6583}
6584
6585#define XXX1 0 // 0x67a106c
6586#define XXX2 0
6587
sewardj23f12002009-07-24 08:45:08 +00006588static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00006589 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6590 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6591 return False;
6592}
6593static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
sewardj23f12002009-07-24 08:45:08 +00006594 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00006595 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6596 show_thread_state("", thr);
6597 VG_(printf)("%s","\n");
6598}
6599
sewardj23f12002009-07-24 08:45:08 +00006600void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006601{
6602 SVal sv = SVal__mkC(thr->viW, thr->viW);
6603 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00006604 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6605 zsm_sset_range( a, szB, sv );
6606 Filter__clear_range( thr->filter, a, szB );
6607 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00006608}
6609
sewardjfd35d492011-03-17 19:39:55 +00006610void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00006611{
sewardj23f12002009-07-24 08:45:08 +00006612 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00006613}
6614
sewardjfd35d492011-03-17 19:39:55 +00006615void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6616{
6617 /* This really does put the requested range in NoAccess. It's
6618 expensive though. */
6619 SVal sv = SVal_NOACCESS;
6620 tl_assert(is_sane_SVal_C(sv));
6621 zsm_sset_range( a, szB, sv );
6622 Filter__clear_range( thr->filter, a, szB );
6623}
6624
sewardj406bac82010-03-03 23:03:40 +00006625void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
6626{
6627 SVal sv = SVal_NOACCESS;
6628 tl_assert(is_sane_SVal_C(sv));
6629 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
6630 zsm_sset_range( a, szB, sv );
6631 Filter__clear_range( thr->filter, a, szB );
6632 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
6633}
6634
sewardj0b20a152011-03-10 21:34:21 +00006635Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
sewardjf98e1c02008-10-25 16:22:41 +00006636 tl_assert(thr);
sewardj60626642011-03-10 15:14:37 +00006637 return thr->hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006638}
6639
sewardj0b20a152011-03-10 21:34:21 +00006640void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
sewardjf98e1c02008-10-25 16:22:41 +00006641 tl_assert(thr);
sewardj0b20a152011-03-10 21:34:21 +00006642 thr->hgthread = hgthread;
sewardjf98e1c02008-10-25 16:22:41 +00006643}
6644
sewardj23f12002009-07-24 08:45:08 +00006645void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00006646{
sewardj23f12002009-07-24 08:45:08 +00006647 zsm_scopy_range(src, dst, len);
6648 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00006649}
6650
6651void libhb_maybe_GC ( void )
6652{
6653 event_map_maybe_GC();
6654 /* If there are still freelist entries available, no need for a
6655 GC. */
6656 if (vts_tab_freelist != VtsID_INVALID)
6657 return;
6658 /* So all the table entries are full, and we're having to expand
6659 the table. But did we hit the threshhold point yet? */
6660 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
6661 return;
6662 vts_tab__do_GC( False/*don't show stats*/ );
6663}
6664
6665
6666/////////////////////////////////////////////////////////////////
6667/////////////////////////////////////////////////////////////////
6668// //
6669// SECTION END main library //
6670// //
6671/////////////////////////////////////////////////////////////////
6672/////////////////////////////////////////////////////////////////
6673
6674/*--------------------------------------------------------------------*/
6675/*--- end libhb_main.c ---*/
6676/*--------------------------------------------------------------------*/