blob: 2b7d92c4c88cc7f8c0d2655d4e466c3b179f9726 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
njn9f207462009-03-10 22:02:09 +000012 Copyright (C) 2008-2009 OpenWorks Ltd
sewardjf98e1c02008-10-25 16:22:41 +000013 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
sewardjbc307e52008-12-06 22:10:54 +000039#include "pub_tool_sparsewa.h"
sewardjf98e1c02008-10-25 16:22:41 +000040#include "pub_tool_xarray.h"
41#include "pub_tool_oset.h"
42#include "pub_tool_threadstate.h"
43#include "pub_tool_aspacemgr.h"
44#include "pub_tool_execontext.h"
45#include "pub_tool_errormgr.h"
sewardjd024ae52008-11-09 20:47:57 +000046#include "pub_tool_options.h" // VG_(clo_verbosity)
sewardjf98e1c02008-10-25 16:22:41 +000047#include "hg_basics.h"
48#include "hg_wordset.h"
49#include "hg_lock_n_thread.h"
50#include "hg_errors.h"
51
52#include "libhb.h"
53
54
sewardj8f5374e2008-12-07 11:40:17 +000055/////////////////////////////////////////////////////////////////
56/////////////////////////////////////////////////////////////////
57// //
58// Debugging #defines //
59// //
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62
63/* Check the sanity of shadow values in the core memory state
64 machine. Change #if 0 to #if 1 to enable this. */
65#if 0
66# define CHECK_MSM 1
67#else
68# define CHECK_MSM 0
69#endif
70
71
72/* Check sanity (reference counts, etc) in the conflicting access
73 machinery. Change #if 0 to #if 1 to enable this. */
74#if 0
75# define CHECK_CEM 1
76#else
77# define CHECK_CEM 0
78#endif
79
80
81/* Check sanity in the compressed shadow memory machinery,
82 particularly in its caching innards. Unfortunately there's no
83 almost-zero-cost way to make them selectable at run time. Hence
84 set the #if 0 to #if 1 and rebuild if you want them. */
85#if 0
86# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
87# define inline __attribute__((noinline))
88 /* probably want to ditch -fomit-frame-pointer too */
89#else
90# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
91#endif
92
93
94/////////////////////////////////////////////////////////////////
95/////////////////////////////////////////////////////////////////
96// //
97// Forward declarations //
98// //
99/////////////////////////////////////////////////////////////////
100/////////////////////////////////////////////////////////////////
101
sewardjf98e1c02008-10-25 16:22:41 +0000102/* fwds for
103 Globals needed by other parts of the library. These are set
104 once at startup and then never changed. */
105static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +0000106static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +0000107
sewardjf98e1c02008-10-25 16:22:41 +0000108
109
110/////////////////////////////////////////////////////////////////
111/////////////////////////////////////////////////////////////////
112// //
113// SECTION BEGIN compressed shadow memory //
114// //
115/////////////////////////////////////////////////////////////////
116/////////////////////////////////////////////////////////////////
117
118#ifndef __HB_ZSM_H
119#define __HB_ZSM_H
120
121typedef ULong SVal;
122
123/* This value has special significance to the implementation, and callers
124 may not store it in the shadow memory. */
125#define SVal_INVALID (3ULL << 62)
126
127/* This is the default value for shadow memory. Initially the shadow
128 memory contains no accessible areas and so all reads produce this
129 value. TODO: make this caller-defineable. */
130#define SVal_NOACCESS (2ULL << 62)
131
132/* Initialise the library. Once initialised, it will (or may) call
133 rcinc and rcdec in response to all the calls below, in order to
134 allow the user to do reference counting on the SVals stored herein.
135 It is important to understand, however, that due to internal
136 caching, the reference counts are in general inaccurate, and can be
137 both above or below the true reference count for an item. In
138 particular, the library may indicate that the reference count for
139 an item is zero, when in fact it is not.
140
141 To make the reference counting exact and therefore non-pointless,
142 call zsm_flush_cache. Immediately after it returns, the reference
143 counts for all items, as deduced by the caller by observing calls
144 to rcinc and rcdec, will be correct, and so any items with a zero
145 reference count may be freed (or at least considered to be
146 unreferenced by this library).
147*/
148static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
149
sewardj23f12002009-07-24 08:45:08 +0000150static void zsm_sset_range ( Addr, SizeT, SVal );
151static void zsm_scopy_range ( Addr, Addr, SizeT );
sewardjf98e1c02008-10-25 16:22:41 +0000152static void zsm_flush_cache ( void );
153
154#endif /* ! __HB_ZSM_H */
155
156
sewardjf98e1c02008-10-25 16:22:41 +0000157/* Round a up to the next multiple of N. N must be a power of 2 */
158#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
159/* Round a down to the next multiple of N. N must be a power of 2 */
160#define ROUNDDN(a, N) ((a) & ~(N-1))
161
162
163
164/* ------ User-supplied RC functions ------ */
165static void(*rcinc)(SVal) = NULL;
166static void(*rcdec)(SVal) = NULL;
167
168
169/* ------ CacheLine ------ */
170
171#define N_LINE_BITS 6 /* must be >= 3 */
172#define N_LINE_ARANGE (1 << N_LINE_BITS)
173#define N_LINE_TREES (N_LINE_ARANGE >> 3)
174
175typedef
176 struct {
177 UShort descrs[N_LINE_TREES];
178 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
179 }
180 CacheLine;
181
182#define TREE_DESCR_16_0 (1<<0)
183#define TREE_DESCR_32_0 (1<<1)
184#define TREE_DESCR_16_1 (1<<2)
185#define TREE_DESCR_64 (1<<3)
186#define TREE_DESCR_16_2 (1<<4)
187#define TREE_DESCR_32_1 (1<<5)
188#define TREE_DESCR_16_3 (1<<6)
189#define TREE_DESCR_8_0 (1<<7)
190#define TREE_DESCR_8_1 (1<<8)
191#define TREE_DESCR_8_2 (1<<9)
192#define TREE_DESCR_8_3 (1<<10)
193#define TREE_DESCR_8_4 (1<<11)
194#define TREE_DESCR_8_5 (1<<12)
195#define TREE_DESCR_8_6 (1<<13)
196#define TREE_DESCR_8_7 (1<<14)
197#define TREE_DESCR_DTY (1<<15)
198
199typedef
200 struct {
201 SVal dict[4]; /* can represent up to 4 diff values in the line */
202 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
203 dict indexes */
204 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
205 LineF to use, and dict[2..] are also SVal_INVALID. */
206 }
207 LineZ; /* compressed rep for a cache line */
208
209typedef
210 struct {
211 Bool inUse;
212 SVal w64s[N_LINE_ARANGE];
213 }
214 LineF; /* full rep for a cache line */
215
216/* Shadow memory.
217 Primary map is a WordFM Addr SecMap*.
218 SecMaps cover some page-size-ish section of address space and hold
219 a compressed representation.
220 CacheLine-sized chunks of SecMaps are copied into a Cache, being
221 decompressed when moved into the cache and recompressed on the
222 way out. Because of this, the cache must operate as a writeback
223 cache, not a writethrough one.
224
225 Each SecMap must hold a power-of-2 number of CacheLines. Hence
226 N_SECMAP_BITS must >= N_LINE_BITS.
227*/
228#define N_SECMAP_BITS 13
229#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
230
231// # CacheLines held by a SecMap
232#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
233
234/* The data in the SecMap is held in the array of LineZs. Each LineZ
235 either carries the required data directly, in a compressed
236 representation, or it holds (in .dict[0]) an index to the LineF in
237 .linesF that holds the full representation.
238
239 Currently-unused LineF's have their .inUse bit set to zero.
240 Since each in-use LineF is referred to be exactly one LineZ,
241 the number of .linesZ[] that refer to .linesF should equal
242 the number of .linesF[] that have .inUse == True.
243
244 RC obligations: the RCs presented to the user include exactly
245 the values in:
246 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
247 * F reps that are in use (.inUse == True)
248
249 Hence the following actions at the following transitions are required:
250
251 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
252 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
253 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
254 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
255*/
256typedef
257 struct {
258 UInt magic;
259 LineZ linesZ[N_SECMAP_ZLINES];
260 LineF* linesF;
261 UInt linesF_size;
262 }
263 SecMap;
264
265#define SecMap_MAGIC 0x571e58cbU
266
267static inline Bool is_sane_SecMap ( SecMap* sm ) {
268 return sm != NULL && sm->magic == SecMap_MAGIC;
269}
270
271/* ------ Cache ------ */
272
273#define N_WAY_BITS 16
274#define N_WAY_NENT (1 << N_WAY_BITS)
275
276/* Each tag is the address of the associated CacheLine, rounded down
277 to a CacheLine address boundary. A CacheLine size must be a power
278 of 2 and must be 8 or more. Hence an easy way to initialise the
279 cache so it is empty is to set all the tag values to any value % 8
280 != 0, eg 1. This means all queries in the cache initially miss.
281 It does however require us to detect and not writeback, any line
282 with a bogus tag. */
283typedef
284 struct {
285 CacheLine lyns0[N_WAY_NENT];
286 Addr tags0[N_WAY_NENT];
287 }
288 Cache;
289
290static inline Bool is_valid_scache_tag ( Addr tag ) {
291 /* a valid tag should be naturally aligned to the start of
292 a CacheLine. */
293 return 0 == (tag & (N_LINE_ARANGE - 1));
294}
295
296
297/* --------- Primary data structures --------- */
298
299/* Shadow memory primary map */
300static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
301static Cache cache_shmem;
302
303
304static UWord stats__secmaps_search = 0; // # SM finds
305static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
306static UWord stats__secmaps_allocd = 0; // # SecMaps issued
307static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
308static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
309static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
310static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
311static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
312static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
313static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
314static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
315static UWord stats__cache_F_fetches = 0; // # F lines fetched
316static UWord stats__cache_F_wbacks = 0; // # F lines written back
317static UWord stats__cache_invals = 0; // # cache invals
318static UWord stats__cache_flushes = 0; // # cache flushes
319static UWord stats__cache_totrefs = 0; // # total accesses
320static UWord stats__cache_totmisses = 0; // # misses
321static ULong stats__cache_make_New_arange = 0; // total arange made New
322static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
323static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
sewardj23f12002009-07-24 08:45:08 +0000324static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
325static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
326static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
327static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
328static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
329static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
330static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
331static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
332static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
333static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
334static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
335static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
336static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
337static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
sewardjf98e1c02008-10-25 16:22:41 +0000338static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
339static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
340static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
341static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
342static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
343static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
344
345static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
346 return a & ~(N_SECMAP_ARANGE - 1);
347}
348static inline UWord shmem__get_SecMap_offset ( Addr a ) {
349 return a & (N_SECMAP_ARANGE - 1);
350}
351
352
353/*----------------------------------------------------------------*/
354/*--- map_shmem :: WordFM Addr SecMap ---*/
355/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
356/*----------------------------------------------------------------*/
357
358/*--------------- SecMap allocation --------------- */
359
360static HChar* shmem__bigchunk_next = NULL;
361static HChar* shmem__bigchunk_end1 = NULL;
362
363static void* shmem__bigchunk_alloc ( SizeT n )
364{
365 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
366 tl_assert(n > 0);
367 n = VG_ROUNDUP(n, 16);
368 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
369 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
370 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
371 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
372 if (0)
373 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
374 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
375 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
376 if (shmem__bigchunk_next == NULL)
377 VG_(out_of_memory_NORETURN)(
378 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
379 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
380 }
381 tl_assert(shmem__bigchunk_next);
382 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
383 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
384 shmem__bigchunk_next += n;
385 return shmem__bigchunk_next - n;
386}
387
388static SecMap* shmem__alloc_SecMap ( void )
389{
390 Word i, j;
391 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
392 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
393 tl_assert(sm);
394 sm->magic = SecMap_MAGIC;
395 for (i = 0; i < N_SECMAP_ZLINES; i++) {
396 sm->linesZ[i].dict[0] = SVal_NOACCESS;
397 sm->linesZ[i].dict[1] = SVal_INVALID;
398 sm->linesZ[i].dict[2] = SVal_INVALID;
399 sm->linesZ[i].dict[3] = SVal_INVALID;
400 for (j = 0; j < N_LINE_ARANGE/4; j++)
401 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
402 }
403 sm->linesF = NULL;
404 sm->linesF_size = 0;
405 stats__secmaps_allocd++;
406 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
407 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
408 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
409 return sm;
410}
411
412typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
413static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
414
415static SecMap* shmem__find_SecMap ( Addr ga )
416{
417 SecMap* sm = NULL;
418 Addr gaKey = shmem__round_to_SecMap_base(ga);
419 // Cache
420 stats__secmaps_search++;
421 if (LIKELY(gaKey == smCache[0].gaKey))
422 return smCache[0].sm;
423 if (LIKELY(gaKey == smCache[1].gaKey)) {
424 SMCacheEnt tmp = smCache[0];
425 smCache[0] = smCache[1];
426 smCache[1] = tmp;
427 return smCache[0].sm;
428 }
429 if (gaKey == smCache[2].gaKey) {
430 SMCacheEnt tmp = smCache[1];
431 smCache[1] = smCache[2];
432 smCache[2] = tmp;
433 return smCache[1].sm;
434 }
435 // end Cache
436 stats__secmaps_search_slow++;
437 if (VG_(lookupFM)( map_shmem,
438 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
439 tl_assert(sm != NULL);
440 smCache[2] = smCache[1];
441 smCache[1] = smCache[0];
442 smCache[0].gaKey = gaKey;
443 smCache[0].sm = sm;
444 } else {
445 tl_assert(sm == NULL);
446 }
447 return sm;
448}
449
450static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
451{
452 SecMap* sm = shmem__find_SecMap ( ga );
453 if (LIKELY(sm)) {
454 return sm;
455 } else {
456 /* create a new one */
457 Addr gaKey = shmem__round_to_SecMap_base(ga);
458 sm = shmem__alloc_SecMap();
459 tl_assert(sm);
460 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
461 return sm;
462 }
463}
464
465
466/* ------------ LineF and LineZ related ------------ */
467
468static void rcinc_LineF ( LineF* lineF ) {
469 UWord i;
470 tl_assert(lineF->inUse);
471 for (i = 0; i < N_LINE_ARANGE; i++)
472 rcinc(lineF->w64s[i]);
473}
474
475static void rcdec_LineF ( LineF* lineF ) {
476 UWord i;
477 tl_assert(lineF->inUse);
478 for (i = 0; i < N_LINE_ARANGE; i++)
479 rcdec(lineF->w64s[i]);
480}
481
482static void rcinc_LineZ ( LineZ* lineZ ) {
483 tl_assert(lineZ->dict[0] != SVal_INVALID);
484 rcinc(lineZ->dict[0]);
485 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
486 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
487 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
488}
489
490static void rcdec_LineZ ( LineZ* lineZ ) {
491 tl_assert(lineZ->dict[0] != SVal_INVALID);
492 rcdec(lineZ->dict[0]);
493 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
494 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
495 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
496}
497
498inline
499static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
500 Word bix, shft, mask, prep;
501 tl_assert(ix >= 0);
502 bix = ix >> 2;
503 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
504 mask = 3 << shft;
505 prep = b2 << shft;
506 arr[bix] = (arr[bix] & ~mask) | prep;
507}
508
509inline
510static UWord read_twobit_array ( UChar* arr, UWord ix ) {
511 Word bix, shft;
512 tl_assert(ix >= 0);
513 bix = ix >> 2;
514 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
515 return (arr[bix] >> shft) & 3;
516}
517
518/* Given address 'tag', find either the Z or F line containing relevant
519 data, so it can be read into the cache.
520*/
521static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
522 /*OUT*/LineF** fp, Addr tag ) {
523 LineZ* lineZ;
524 LineF* lineF;
525 UWord zix;
526 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
527 UWord smoff = shmem__get_SecMap_offset(tag);
528 /* since smoff is derived from a valid tag, it should be
529 cacheline-aligned. */
530 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
531 zix = smoff >> N_LINE_BITS;
532 tl_assert(zix < N_SECMAP_ZLINES);
533 lineZ = &sm->linesZ[zix];
534 lineF = NULL;
535 if (lineZ->dict[0] == SVal_INVALID) {
536 UInt fix = (UInt)lineZ->dict[1];
537 tl_assert(sm->linesF);
538 tl_assert(sm->linesF_size > 0);
539 tl_assert(fix >= 0 && fix < sm->linesF_size);
540 lineF = &sm->linesF[fix];
541 tl_assert(lineF->inUse);
542 lineZ = NULL;
543 }
544 *zp = lineZ;
545 *fp = lineF;
546}
547
548/* Given address 'tag', return the relevant SecMap and the index of
549 the LineZ within it, in the expectation that the line is to be
550 overwritten. Regardless of whether 'tag' is currently associated
551 with a Z or F representation, to rcdec on the current
552 representation, in recognition of the fact that the contents are
553 just about to be overwritten. */
554static __attribute__((noinline))
555void find_Z_for_writing ( /*OUT*/SecMap** smp,
556 /*OUT*/Word* zixp,
557 Addr tag ) {
558 LineZ* lineZ;
559 LineF* lineF;
560 UWord zix;
561 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
562 UWord smoff = shmem__get_SecMap_offset(tag);
563 /* since smoff is derived from a valid tag, it should be
564 cacheline-aligned. */
565 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
566 zix = smoff >> N_LINE_BITS;
567 tl_assert(zix < N_SECMAP_ZLINES);
568 lineZ = &sm->linesZ[zix];
569 lineF = NULL;
570 /* re RCs, we are freeing up this LineZ/LineF so that new data can
571 be parked in it. Hence have to rcdec it accordingly. */
572 /* If lineZ has an associated lineF, free it up. */
573 if (lineZ->dict[0] == SVal_INVALID) {
574 UInt fix = (UInt)lineZ->dict[1];
575 tl_assert(sm->linesF);
576 tl_assert(sm->linesF_size > 0);
577 tl_assert(fix >= 0 && fix < sm->linesF_size);
578 lineF = &sm->linesF[fix];
579 tl_assert(lineF->inUse);
580 rcdec_LineF(lineF);
581 lineF->inUse = False;
582 } else {
583 rcdec_LineZ(lineZ);
584 }
585 *smp = sm;
586 *zixp = zix;
587}
588
589static __attribute__((noinline))
590void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
591 UInt i, new_size;
592 LineF* nyu;
593
594 if (sm->linesF) {
595 tl_assert(sm->linesF_size > 0);
596 } else {
597 tl_assert(sm->linesF_size == 0);
598 }
599
600 if (sm->linesF) {
601 for (i = 0; i < sm->linesF_size; i++) {
602 if (!sm->linesF[i].inUse) {
603 *fixp = (Word)i;
604 return;
605 }
606 }
607 }
608
609 /* No free F line found. Expand existing array and try again. */
610 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
611 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
612 new_size * sizeof(LineF) );
613 tl_assert(nyu);
614
615 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
616 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
617 * sizeof(LineF);
618
619 if (0)
620 VG_(printf)("SM %p: expand F array from %d to %d\n",
621 sm, (Int)sm->linesF_size, new_size);
622
623 for (i = 0; i < new_size; i++)
624 nyu[i].inUse = False;
625
626 if (sm->linesF) {
627 for (i = 0; i < sm->linesF_size; i++) {
628 tl_assert(sm->linesF[i].inUse);
629 nyu[i] = sm->linesF[i];
630 }
631 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
632 HG_(free)(sm->linesF);
633 }
634
635 sm->linesF = nyu;
636 sm->linesF_size = new_size;
637
638 for (i = 0; i < sm->linesF_size; i++) {
639 if (!sm->linesF[i].inUse) {
640 *fixp = (Word)i;
641 return;
642 }
643 }
644
645 /*NOTREACHED*/
646 tl_assert(0);
647}
648
649
650/* ------------ CacheLine and implicit-tree related ------------ */
651
652__attribute__((unused))
653static void pp_CacheLine ( CacheLine* cl ) {
654 Word i;
655 if (!cl) {
656 VG_(printf)("%s","pp_CacheLine(NULL)\n");
657 return;
658 }
659 for (i = 0; i < N_LINE_TREES; i++)
660 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
661 for (i = 0; i < N_LINE_ARANGE; i++)
662 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
663}
664
665static UChar descr_to_validbits ( UShort descr )
666{
667 /* a.k.a Party Time for gcc's constant folder */
668# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
669 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
670 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
671 ( (b8_5) << 12) | ( (b8_4) << 11) | \
672 ( (b8_3) << 10) | ( (b8_2) << 9) | \
673 ( (b8_1) << 8) | ( (b8_0) << 7) | \
674 ( (b16_3) << 6) | ( (b32_1) << 5) | \
675 ( (b16_2) << 4) | ( (b64) << 3) | \
676 ( (b16_1) << 2) | ( (b32_0) << 1) | \
677 ( (b16_0) << 0) ) )
678
679# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
680 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
681 ( (bit5) << 5) | ( (bit4) << 4) | \
682 ( (bit3) << 3) | ( (bit2) << 2) | \
683 ( (bit1) << 1) | ( (bit0) << 0) ) )
684
685 /* these should all get folded out at compile time */
686 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
687 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
688 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
689 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
690 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
691 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
692 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
693 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
694 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
695
696 switch (descr) {
697 /*
698 +--------------------------------- TREE_DESCR_8_7
699 | +------------------- TREE_DESCR_8_0
700 | | +---------------- TREE_DESCR_16_3
701 | | | +-------------- TREE_DESCR_32_1
702 | | | | +------------ TREE_DESCR_16_2
703 | | | | | +--------- TREE_DESCR_64
704 | | | | | | +------ TREE_DESCR_16_1
705 | | | | | | | +---- TREE_DESCR_32_0
706 | | | | | | | | +-- TREE_DESCR_16_0
707 | | | | | | | | |
708 | | | | | | | | | GRANULARITY, 7 -> 0 */
709 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
710 return BYTE(1,1,1,1,1,1,1,1);
711 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
712 return BYTE(1,1,0,1,1,1,1,1);
713 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
714 return BYTE(0,1,1,1,1,1,1,1);
715 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
716 return BYTE(0,1,0,1,1,1,1,1);
717
718 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
719 return BYTE(1,1,1,1,1,1,0,1);
720 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
721 return BYTE(1,1,0,1,1,1,0,1);
722 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
723 return BYTE(0,1,1,1,1,1,0,1);
724 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
725 return BYTE(0,1,0,1,1,1,0,1);
726
727 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
728 return BYTE(1,1,1,1,0,1,1,1);
729 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
730 return BYTE(1,1,0,1,0,1,1,1);
731 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
732 return BYTE(0,1,1,1,0,1,1,1);
733 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
734 return BYTE(0,1,0,1,0,1,1,1);
735
736 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
737 return BYTE(1,1,1,1,0,1,0,1);
738 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
739 return BYTE(1,1,0,1,0,1,0,1);
740 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
741 return BYTE(0,1,1,1,0,1,0,1);
742 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
743 return BYTE(0,1,0,1,0,1,0,1);
744
745 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
746 return BYTE(0,0,0,1,1,1,1,1);
747 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
748 return BYTE(0,0,0,1,1,1,0,1);
749 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
750 return BYTE(0,0,0,1,0,1,1,1);
751 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
752 return BYTE(0,0,0,1,0,1,0,1);
753
754 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
755 return BYTE(1,1,1,1,0,0,0,1);
756 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
757 return BYTE(1,1,0,1,0,0,0,1);
758 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
759 return BYTE(0,1,1,1,0,0,0,1);
760 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
761 return BYTE(0,1,0,1,0,0,0,1);
762
763 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
764 return BYTE(0,0,0,1,0,0,0,1);
765
766 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
767 return BYTE(0,0,0,0,0,0,0,1);
768
769 default: return BYTE(0,0,0,0,0,0,0,0);
770 /* INVALID - any valid descr produces at least one
771 valid bit in tree[0..7]*/
772 }
773 /* NOTREACHED*/
774 tl_assert(0);
775
776# undef DESCR
777# undef BYTE
778}
779
780__attribute__((unused))
781static Bool is_sane_Descr ( UShort descr ) {
782 return descr_to_validbits(descr) != 0;
783}
784
785static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
786 VG_(sprintf)(dst,
787 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
788 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
789 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
790 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
791 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
792 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
793 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
794 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
795 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
796 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
797 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
798 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
799 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
800 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
801 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
802 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
803 );
804}
805static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
806 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
807 (Int)((byte & 128) ? 1 : 0),
808 (Int)((byte & 64) ? 1 : 0),
809 (Int)((byte & 32) ? 1 : 0),
810 (Int)((byte & 16) ? 1 : 0),
811 (Int)((byte & 8) ? 1 : 0),
812 (Int)((byte & 4) ? 1 : 0),
813 (Int)((byte & 2) ? 1 : 0),
814 (Int)((byte & 1) ? 1 : 0)
815 );
816}
817
818static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
819 Word i;
820 UChar validbits = descr_to_validbits(descr);
821 HChar buf[128], buf2[128];
822 if (validbits == 0)
823 goto bad;
824 for (i = 0; i < 8; i++) {
825 if (validbits & (1<<i)) {
826 if (tree[i] == SVal_INVALID)
827 goto bad;
828 } else {
829 if (tree[i] != SVal_INVALID)
830 goto bad;
831 }
832 }
833 return True;
834 bad:
835 sprintf_Descr( buf, descr );
836 sprintf_Byte( buf2, validbits );
837 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
838 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
839 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
840 for (i = 0; i < 8; i++)
841 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
842 VG_(printf)("%s","}\n");
843 return 0;
844}
845
846static Bool is_sane_CacheLine ( CacheLine* cl )
847{
848 Word tno, cloff;
849
850 if (!cl) goto bad;
851
852 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
853 UShort descr = cl->descrs[tno];
854 SVal* tree = &cl->svals[cloff];
855 if (!is_sane_Descr_and_Tree(descr, tree))
856 goto bad;
857 }
858 tl_assert(cloff == N_LINE_ARANGE);
859 return True;
860 bad:
861 pp_CacheLine(cl);
862 return False;
863}
864
865static UShort normalise_tree ( /*MOD*/SVal* tree )
866{
867 UShort descr;
868 /* pre: incoming tree[0..7] does not have any invalid shvals, in
869 particular no zeroes. */
870 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
871 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
872 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
873 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
874 tl_assert(0);
875
876 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
877 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
878 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
879 /* build 16-bit layer */
880 if (tree[1] == tree[0]) {
881 tree[1] = SVal_INVALID;
882 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
883 descr |= TREE_DESCR_16_0;
884 }
885 if (tree[3] == tree[2]) {
886 tree[3] = SVal_INVALID;
887 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
888 descr |= TREE_DESCR_16_1;
889 }
890 if (tree[5] == tree[4]) {
891 tree[5] = SVal_INVALID;
892 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
893 descr |= TREE_DESCR_16_2;
894 }
895 if (tree[7] == tree[6]) {
896 tree[7] = SVal_INVALID;
897 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
898 descr |= TREE_DESCR_16_3;
899 }
900 /* build 32-bit layer */
901 if (tree[2] == tree[0]
902 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
903 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
904 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
905 descr |= TREE_DESCR_32_0;
906 }
907 if (tree[6] == tree[4]
908 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
909 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
910 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
911 descr |= TREE_DESCR_32_1;
912 }
913 /* build 64-bit layer */
914 if (tree[4] == tree[0]
915 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
916 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
917 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
918 descr |= TREE_DESCR_64;
919 }
920 return descr;
921}
922
923/* This takes a cacheline where all the data is at the leaves
924 (w8[..]) and builds a correctly normalised tree. */
925static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
926{
927 Word tno, cloff;
928 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
929 SVal* tree = &cl->svals[cloff];
930 cl->descrs[tno] = normalise_tree( tree );
931 }
932 tl_assert(cloff == N_LINE_ARANGE);
sewardj8f5374e2008-12-07 11:40:17 +0000933 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +0000934 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
935 stats__cline_normalises++;
936}
937
938
939typedef struct { UChar count; SVal sval; } CountedSVal;
940
941static
942void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
943 /*OUT*/Word* dstUsedP,
944 Word nDst, CacheLine* src )
945{
946 Word tno, cloff, dstUsed;
947
948 tl_assert(nDst == N_LINE_ARANGE);
949 dstUsed = 0;
950
951 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
952 UShort descr = src->descrs[tno];
953 SVal* tree = &src->svals[cloff];
954
955 /* sequentialise the tree described by (descr,tree). */
956# define PUT(_n,_v) \
957 do { dst[dstUsed ].count = (_n); \
958 dst[dstUsed++].sval = (_v); \
959 } while (0)
960
961 /* byte 0 */
962 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
963 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
964 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
965 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
966 /* byte 1 */
967 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
968 /* byte 2 */
969 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
970 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
971 /* byte 3 */
972 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
973 /* byte 4 */
974 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
975 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
976 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
977 /* byte 5 */
978 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
979 /* byte 6 */
980 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
981 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
982 /* byte 7 */
983 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
984
985# undef PUT
986 /* END sequentialise the tree described by (descr,tree). */
987
988 }
989 tl_assert(cloff == N_LINE_ARANGE);
990 tl_assert(dstUsed <= nDst);
991
992 *dstUsedP = dstUsed;
993}
994
995/* Write the cacheline 'wix' to backing store. Where it ends up
996 is determined by its tag field. */
997static __attribute__((noinline)) void cacheline_wback ( UWord wix )
998{
999 Word i, j, k, m;
1000 Addr tag;
1001 SecMap* sm;
1002 CacheLine* cl;
1003 LineZ* lineZ;
1004 LineF* lineF;
1005 Word zix, fix, csvalsUsed;
1006 CountedSVal csvals[N_LINE_ARANGE];
1007 SVal sv;
1008
1009 if (0)
1010 VG_(printf)("scache wback line %d\n", (Int)wix);
1011
1012 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1013
1014 tag = cache_shmem.tags0[wix];
1015 cl = &cache_shmem.lyns0[wix];
1016
1017 /* The cache line may have been invalidated; if so, ignore it. */
1018 if (!is_valid_scache_tag(tag))
1019 return;
1020
1021 /* Where are we going to put it? */
1022 sm = NULL;
1023 lineZ = NULL;
1024 lineF = NULL;
1025 zix = fix = -1;
1026
1027 /* find the Z line to write in and rcdec it or the associated F
1028 line. */
1029 find_Z_for_writing( &sm, &zix, tag );
1030
1031 tl_assert(sm);
1032 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1033 lineZ = &sm->linesZ[zix];
1034
1035 /* Generate the data to be stored */
sewardj8f5374e2008-12-07 11:40:17 +00001036 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001037 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1038
1039 csvalsUsed = -1;
1040 sequentialise_CacheLine( csvals, &csvalsUsed,
1041 N_LINE_ARANGE, cl );
1042 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1043 if (0) VG_(printf)("%lu ", csvalsUsed);
1044
1045 lineZ->dict[0] = lineZ->dict[1]
1046 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1047
1048 /* i indexes actual shadow values, k is cursor in csvals */
1049 i = 0;
1050 for (k = 0; k < csvalsUsed; k++) {
1051
1052 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001053 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001054 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1055 /* do we already have it? */
1056 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1057 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1058 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1059 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1060 /* no. look for a free slot. */
sewardj8f5374e2008-12-07 11:40:17 +00001061 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001062 tl_assert(sv != SVal_INVALID);
1063 if (lineZ->dict[0]
1064 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1065 if (lineZ->dict[1]
1066 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1067 if (lineZ->dict[2]
1068 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1069 if (lineZ->dict[3]
1070 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1071 break; /* we'll have to use the f rep */
1072 dict_ok:
1073 m = csvals[k].count;
1074 if (m == 8) {
1075 write_twobit_array( lineZ->ix2s, i+0, j );
1076 write_twobit_array( lineZ->ix2s, i+1, j );
1077 write_twobit_array( lineZ->ix2s, i+2, j );
1078 write_twobit_array( lineZ->ix2s, i+3, j );
1079 write_twobit_array( lineZ->ix2s, i+4, j );
1080 write_twobit_array( lineZ->ix2s, i+5, j );
1081 write_twobit_array( lineZ->ix2s, i+6, j );
1082 write_twobit_array( lineZ->ix2s, i+7, j );
1083 i += 8;
1084 }
1085 else if (m == 4) {
1086 write_twobit_array( lineZ->ix2s, i+0, j );
1087 write_twobit_array( lineZ->ix2s, i+1, j );
1088 write_twobit_array( lineZ->ix2s, i+2, j );
1089 write_twobit_array( lineZ->ix2s, i+3, j );
1090 i += 4;
1091 }
1092 else if (m == 1) {
1093 write_twobit_array( lineZ->ix2s, i+0, j );
1094 i += 1;
1095 }
1096 else if (m == 2) {
1097 write_twobit_array( lineZ->ix2s, i+0, j );
1098 write_twobit_array( lineZ->ix2s, i+1, j );
1099 i += 2;
1100 }
1101 else {
1102 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1103 }
1104
1105 }
1106
1107 if (LIKELY(i == N_LINE_ARANGE)) {
1108 /* Construction of the compressed representation was
1109 successful. */
1110 rcinc_LineZ(lineZ);
1111 stats__cache_Z_wbacks++;
1112 } else {
1113 /* Cannot use the compressed(z) representation. Use the full(f)
1114 rep instead. */
1115 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1116 alloc_F_for_writing( sm, &fix );
1117 tl_assert(sm->linesF);
1118 tl_assert(sm->linesF_size > 0);
1119 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1120 lineF = &sm->linesF[fix];
1121 tl_assert(!lineF->inUse);
1122 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1123 lineZ->dict[1] = (SVal)fix;
1124 lineF->inUse = True;
1125 i = 0;
1126 for (k = 0; k < csvalsUsed; k++) {
sewardj8f5374e2008-12-07 11:40:17 +00001127 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001128 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1129 sv = csvals[k].sval;
sewardj8f5374e2008-12-07 11:40:17 +00001130 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001131 tl_assert(sv != SVal_INVALID);
1132 for (m = csvals[k].count; m > 0; m--) {
1133 lineF->w64s[i] = sv;
1134 i++;
1135 }
1136 }
1137 tl_assert(i == N_LINE_ARANGE);
1138 rcinc_LineF(lineF);
1139 stats__cache_F_wbacks++;
1140 }
sewardjf98e1c02008-10-25 16:22:41 +00001141}
1142
1143/* Fetch the cacheline 'wix' from the backing store. The tag
1144 associated with 'wix' is assumed to have already been filled in;
1145 hence that is used to determine where in the backing store to read
1146 from. */
1147static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1148{
1149 Word i;
1150 Addr tag;
1151 CacheLine* cl;
1152 LineZ* lineZ;
1153 LineF* lineF;
1154
1155 if (0)
1156 VG_(printf)("scache fetch line %d\n", (Int)wix);
1157
1158 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1159
1160 tag = cache_shmem.tags0[wix];
1161 cl = &cache_shmem.lyns0[wix];
1162
1163 /* reject nonsense requests */
1164 tl_assert(is_valid_scache_tag(tag));
1165
1166 lineZ = NULL;
1167 lineF = NULL;
1168 find_ZF_for_reading( &lineZ, &lineF, tag );
1169 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1170
1171 /* expand the data into the bottom layer of the tree, then get
1172 cacheline_normalise to build the descriptor array. */
1173 if (lineF) {
1174 tl_assert(lineF->inUse);
1175 for (i = 0; i < N_LINE_ARANGE; i++) {
1176 cl->svals[i] = lineF->w64s[i];
1177 }
1178 stats__cache_F_fetches++;
1179 } else {
1180 for (i = 0; i < N_LINE_ARANGE; i++) {
1181 SVal sv;
1182 UWord ix = read_twobit_array( lineZ->ix2s, i );
1183 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1184 sv = lineZ->dict[ix];
1185 tl_assert(sv != SVal_INVALID);
1186 cl->svals[i] = sv;
1187 }
1188 stats__cache_Z_fetches++;
1189 }
1190 normalise_CacheLine( cl );
1191}
1192
1193static void shmem__invalidate_scache ( void ) {
1194 Word wix;
1195 if (0) VG_(printf)("%s","scache inval\n");
1196 tl_assert(!is_valid_scache_tag(1));
1197 for (wix = 0; wix < N_WAY_NENT; wix++) {
1198 cache_shmem.tags0[wix] = 1/*INVALID*/;
1199 }
1200 stats__cache_invals++;
1201}
1202
1203static void shmem__flush_and_invalidate_scache ( void ) {
1204 Word wix;
1205 Addr tag;
1206 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1207 tl_assert(!is_valid_scache_tag(1));
1208 for (wix = 0; wix < N_WAY_NENT; wix++) {
1209 tag = cache_shmem.tags0[wix];
1210 if (tag == 1/*INVALID*/) {
1211 /* already invalid; nothing to do */
1212 } else {
1213 tl_assert(is_valid_scache_tag(tag));
1214 cacheline_wback( wix );
1215 }
1216 cache_shmem.tags0[wix] = 1/*INVALID*/;
1217 }
1218 stats__cache_flushes++;
1219 stats__cache_invals++;
1220}
1221
1222
1223static inline Bool aligned16 ( Addr a ) {
1224 return 0 == (a & 1);
1225}
1226static inline Bool aligned32 ( Addr a ) {
1227 return 0 == (a & 3);
1228}
1229static inline Bool aligned64 ( Addr a ) {
1230 return 0 == (a & 7);
1231}
1232static inline UWord get_cacheline_offset ( Addr a ) {
1233 return (UWord)(a & (N_LINE_ARANGE - 1));
1234}
1235static inline Addr cacheline_ROUNDUP ( Addr a ) {
1236 return ROUNDUP(a, N_LINE_ARANGE);
1237}
1238static inline Addr cacheline_ROUNDDN ( Addr a ) {
1239 return ROUNDDN(a, N_LINE_ARANGE);
1240}
1241static inline UWord get_treeno ( Addr a ) {
1242 return get_cacheline_offset(a) >> 3;
1243}
1244static inline UWord get_tree_offset ( Addr a ) {
1245 return a & 7;
1246}
1247
1248static __attribute__((noinline))
1249 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1250static inline CacheLine* get_cacheline ( Addr a )
1251{
1252 /* tag is 'a' with the in-line offset masked out,
1253 eg a[31]..a[4] 0000 */
1254 Addr tag = a & ~(N_LINE_ARANGE - 1);
1255 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1256 stats__cache_totrefs++;
1257 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1258 return &cache_shmem.lyns0[wix];
1259 } else {
1260 return get_cacheline_MISS( a );
1261 }
1262}
1263
1264static __attribute__((noinline))
1265 CacheLine* get_cacheline_MISS ( Addr a )
1266{
1267 /* tag is 'a' with the in-line offset masked out,
1268 eg a[31]..a[4] 0000 */
1269
1270 CacheLine* cl;
1271 Addr* tag_old_p;
1272 Addr tag = a & ~(N_LINE_ARANGE - 1);
1273 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1274
1275 tl_assert(tag != cache_shmem.tags0[wix]);
1276
1277 /* Dump the old line into the backing store. */
1278 stats__cache_totmisses++;
1279
1280 cl = &cache_shmem.lyns0[wix];
1281 tag_old_p = &cache_shmem.tags0[wix];
1282
1283 if (is_valid_scache_tag( *tag_old_p )) {
1284 /* EXPENSIVE and REDUNDANT: callee does it */
sewardj8f5374e2008-12-07 11:40:17 +00001285 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001286 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1287 cacheline_wback( wix );
1288 }
1289 /* and reload the new one */
1290 *tag_old_p = tag;
1291 cacheline_fetch( wix );
sewardj8f5374e2008-12-07 11:40:17 +00001292 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00001293 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1294 return cl;
1295}
1296
1297static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1298 stats__cline_64to32pulldown++;
1299 switch (toff) {
1300 case 0: case 4:
1301 tl_assert(descr & TREE_DESCR_64);
1302 tree[4] = tree[0];
1303 descr &= ~TREE_DESCR_64;
1304 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1305 break;
1306 default:
1307 tl_assert(0);
1308 }
1309 return descr;
1310}
1311
1312static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1313 stats__cline_32to16pulldown++;
1314 switch (toff) {
1315 case 0: case 2:
1316 if (!(descr & TREE_DESCR_32_0)) {
1317 descr = pulldown_to_32(tree, 0, descr);
1318 }
1319 tl_assert(descr & TREE_DESCR_32_0);
1320 tree[2] = tree[0];
1321 descr &= ~TREE_DESCR_32_0;
1322 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1323 break;
1324 case 4: case 6:
1325 if (!(descr & TREE_DESCR_32_1)) {
1326 descr = pulldown_to_32(tree, 4, descr);
1327 }
1328 tl_assert(descr & TREE_DESCR_32_1);
1329 tree[6] = tree[4];
1330 descr &= ~TREE_DESCR_32_1;
1331 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1332 break;
1333 default:
1334 tl_assert(0);
1335 }
1336 return descr;
1337}
1338
1339static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1340 stats__cline_16to8pulldown++;
1341 switch (toff) {
1342 case 0: case 1:
1343 if (!(descr & TREE_DESCR_16_0)) {
1344 descr = pulldown_to_16(tree, 0, descr);
1345 }
1346 tl_assert(descr & TREE_DESCR_16_0);
1347 tree[1] = tree[0];
1348 descr &= ~TREE_DESCR_16_0;
1349 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1350 break;
1351 case 2: case 3:
1352 if (!(descr & TREE_DESCR_16_1)) {
1353 descr = pulldown_to_16(tree, 2, descr);
1354 }
1355 tl_assert(descr & TREE_DESCR_16_1);
1356 tree[3] = tree[2];
1357 descr &= ~TREE_DESCR_16_1;
1358 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1359 break;
1360 case 4: case 5:
1361 if (!(descr & TREE_DESCR_16_2)) {
1362 descr = pulldown_to_16(tree, 4, descr);
1363 }
1364 tl_assert(descr & TREE_DESCR_16_2);
1365 tree[5] = tree[4];
1366 descr &= ~TREE_DESCR_16_2;
1367 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1368 break;
1369 case 6: case 7:
1370 if (!(descr & TREE_DESCR_16_3)) {
1371 descr = pulldown_to_16(tree, 6, descr);
1372 }
1373 tl_assert(descr & TREE_DESCR_16_3);
1374 tree[7] = tree[6];
1375 descr &= ~TREE_DESCR_16_3;
1376 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1377 break;
1378 default:
1379 tl_assert(0);
1380 }
1381 return descr;
1382}
1383
1384
1385static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1386 UShort mask;
1387 switch (toff) {
1388 case 0:
1389 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1390 tl_assert( (descr & mask) == mask );
1391 descr &= ~mask;
1392 descr |= TREE_DESCR_16_0;
1393 break;
1394 case 2:
1395 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1396 tl_assert( (descr & mask) == mask );
1397 descr &= ~mask;
1398 descr |= TREE_DESCR_16_1;
1399 break;
1400 case 4:
1401 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1402 tl_assert( (descr & mask) == mask );
1403 descr &= ~mask;
1404 descr |= TREE_DESCR_16_2;
1405 break;
1406 case 6:
1407 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1408 tl_assert( (descr & mask) == mask );
1409 descr &= ~mask;
1410 descr |= TREE_DESCR_16_3;
1411 break;
1412 default:
1413 tl_assert(0);
1414 }
1415 return descr;
1416}
1417
1418static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1419 UShort mask;
1420 switch (toff) {
1421 case 0:
1422 if (!(descr & TREE_DESCR_16_0))
1423 descr = pullup_descr_to_16(descr, 0);
1424 if (!(descr & TREE_DESCR_16_1))
1425 descr = pullup_descr_to_16(descr, 2);
1426 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1427 tl_assert( (descr & mask) == mask );
1428 descr &= ~mask;
1429 descr |= TREE_DESCR_32_0;
1430 break;
1431 case 4:
1432 if (!(descr & TREE_DESCR_16_2))
1433 descr = pullup_descr_to_16(descr, 4);
1434 if (!(descr & TREE_DESCR_16_3))
1435 descr = pullup_descr_to_16(descr, 6);
1436 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1437 tl_assert( (descr & mask) == mask );
1438 descr &= ~mask;
1439 descr |= TREE_DESCR_32_1;
1440 break;
1441 default:
1442 tl_assert(0);
1443 }
1444 return descr;
1445}
1446
1447static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1448 switch (toff) {
1449 case 0: case 4:
1450 return 0 != (descr & TREE_DESCR_64);
1451 default:
1452 tl_assert(0);
1453 }
1454}
1455
1456static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1457 switch (toff) {
1458 case 0:
1459 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1460 case 2:
1461 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1462 case 4:
1463 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1464 case 6:
1465 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1466 default:
1467 tl_assert(0);
1468 }
1469}
1470
1471/* ------------ Cache management ------------ */
1472
1473static void zsm_flush_cache ( void )
1474{
1475 shmem__flush_and_invalidate_scache();
1476}
1477
1478
1479static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1480{
1481 tl_assert( sizeof(UWord) == sizeof(Addr) );
1482
1483 rcinc = p_rcinc;
1484 rcdec = p_rcdec;
1485
1486 tl_assert(map_shmem == NULL);
1487 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1488 HG_(free),
1489 NULL/*unboxed UWord cmp*/);
1490 tl_assert(map_shmem != NULL);
1491 shmem__invalidate_scache();
1492
1493 /* a SecMap must contain an integral number of CacheLines */
1494 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1495 /* also ... a CacheLine holds an integral number of trees */
1496 tl_assert(0 == (N_LINE_ARANGE % 8));
1497}
1498
1499/////////////////////////////////////////////////////////////////
1500/////////////////////////////////////////////////////////////////
1501// //
1502// SECTION END compressed shadow memory //
1503// //
1504/////////////////////////////////////////////////////////////////
1505/////////////////////////////////////////////////////////////////
1506
1507
1508
1509/////////////////////////////////////////////////////////////////
1510/////////////////////////////////////////////////////////////////
1511// //
1512// SECTION BEGIN vts primitives //
1513// //
1514/////////////////////////////////////////////////////////////////
1515/////////////////////////////////////////////////////////////////
1516
1517#ifndef __HB_VTS_H
1518#define __HB_VTS_H
1519
1520/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1521 lowest 30 bits of an SVal. */
1522typedef UInt VtsID;
1523#define VtsID_INVALID 0xFFFFFFFF
1524
1525/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1526 a backlink for the caller's convenience. Since we have no idea
1527 what to set that to in the library, it always gets set to
1528 VtsID_INVALID. */
1529typedef
1530 struct {
1531 VtsID id;
1532 XArray* ts; /* XArray* ScalarTS(abstract) */
1533 }
1534 VTS;
1535
1536
1537/* Create a new, empty VTS. */
sewardj23f12002009-07-24 08:45:08 +00001538static VTS* VTS__new ( void );
sewardjf98e1c02008-10-25 16:22:41 +00001539
1540/* Delete this VTS in its entirety. */
sewardj23f12002009-07-24 08:45:08 +00001541static void VTS__delete ( VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001542
1543/* Create a new singleton VTS. */
sewardj23f12002009-07-24 08:45:08 +00001544static VTS* VTS__singleton ( Thr* thr, ULong tym );
sewardjf98e1c02008-10-25 16:22:41 +00001545
1546/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1547 not modified. */
sewardj23f12002009-07-24 08:45:08 +00001548static VTS* VTS__tick ( Thr* me, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001549
1550/* Return a new VTS constructed as the join (max) of the 2 args.
1551 Neither arg is modified. */
sewardj23f12002009-07-24 08:45:08 +00001552static VTS* VTS__join ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001553
sewardj23f12002009-07-24 08:45:08 +00001554/* Compute the partial ordering relation of the two args. Although we
1555 could be completely general and return an enumeration value (EQ,
1556 LT, GT, UN), in fact we only need LEQ, and so we may as well
1557 hardwire that fact.
sewardjf98e1c02008-10-25 16:22:41 +00001558
sewardj23f12002009-07-24 08:45:08 +00001559 Returns NULL iff LEQ(A,B), or non-NULL if not. In the latter case,
1560 the returned Thr* indicates the discovered point for which they are
1561 not. There may be more than one such point, but we only care about
1562 seeing one of them, not all of them. This rather strange
1563 convention is used because sometimes we want to know the actual
1564 index at which they first differ. */
1565static Thr* VTS__cmpLEQ ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001566
1567/* Compute an arbitrary structural (total) ordering on the two args,
1568 based on their VCs, so they can be looked up in a table, tree, etc.
1569 Returns -1, 0 or 1. */
sewardj23f12002009-07-24 08:45:08 +00001570static Word VTS__cmp_structural ( VTS* a, VTS* b );
sewardjf98e1c02008-10-25 16:22:41 +00001571
1572/* Debugging only. Display the given VTS in the buffer. */
sewardj23f12002009-07-24 08:45:08 +00001573static void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
sewardjf98e1c02008-10-25 16:22:41 +00001574
1575/* Debugging only. Return vts[index], so to speak. */
sewardj23f12002009-07-24 08:45:08 +00001576static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001577
1578#endif /* ! __HB_VTS_H */
1579
1580
1581/*--------------- to do with Vector Timestamps ---------------*/
1582
1583/* Scalar Timestamp */
1584typedef
1585 struct {
1586 Thr* thr;
1587 ULong tym;
1588 }
1589 ScalarTS;
1590
1591
1592static Bool is_sane_VTS ( VTS* vts )
1593{
1594 UWord i, n;
1595 ScalarTS *st1, *st2;
1596 if (!vts) return False;
1597 if (!vts->ts) return False;
1598 n = VG_(sizeXA)( vts->ts );
1599 if (n >= 2) {
1600 for (i = 0; i < n-1; i++) {
1601 st1 = VG_(indexXA)( vts->ts, i );
1602 st2 = VG_(indexXA)( vts->ts, i+1 );
1603 if (st1->thr >= st2->thr)
1604 return False;
1605 if (st1->tym == 0 || st2->tym == 0)
1606 return False;
1607 }
1608 }
1609 return True;
1610}
1611
1612
1613/* Create a new, empty VTS.
1614*/
1615VTS* VTS__new ( void )
1616{
1617 VTS* vts;
1618 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1619 tl_assert(vts);
1620 vts->id = VtsID_INVALID;
1621 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1622 HG_(free), sizeof(ScalarTS) );
1623 tl_assert(vts->ts);
1624 return vts;
1625}
1626
1627
1628/* Delete this VTS in its entirety.
1629*/
1630void VTS__delete ( VTS* vts )
1631{
1632 tl_assert(vts);
1633 tl_assert(vts->ts);
1634 VG_(deleteXA)( vts->ts );
1635 HG_(free)(vts);
1636}
1637
1638
1639/* Create a new singleton VTS.
1640*/
1641VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1642 ScalarTS st;
1643 VTS* vts;
1644 tl_assert(thr);
1645 tl_assert(tym >= 1);
1646 vts = VTS__new();
1647 st.thr = thr;
1648 st.tym = tym;
1649 VG_(addToXA)( vts->ts, &st );
1650 return vts;
1651}
1652
1653
1654/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1655 not modified.
1656*/
1657VTS* VTS__tick ( Thr* me, VTS* vts )
1658{
1659 ScalarTS* here = NULL;
1660 ScalarTS tmp;
1661 VTS* res;
1662 Word i, n;
1663 tl_assert(me);
1664 tl_assert(is_sane_VTS(vts));
1665 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1666 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1667 res = VTS__new();
1668 n = VG_(sizeXA)( vts->ts );
1669
1670 /* main loop doesn't handle zero-entry case correctly, so
1671 special-case it. */
1672 if (n == 0) {
1673 tmp.thr = me;
1674 tmp.tym = 1;
1675 VG_(addToXA)( res->ts, &tmp );
1676 tl_assert(is_sane_VTS(res));
1677 return res;
1678 }
1679
1680 for (i = 0; i < n; i++) {
1681 here = VG_(indexXA)( vts->ts, i );
1682 if (me < here->thr) {
1683 /* We just went past 'me', without seeing it. */
1684 tmp.thr = me;
1685 tmp.tym = 1;
1686 VG_(addToXA)( res->ts, &tmp );
1687 tmp = *here;
1688 VG_(addToXA)( res->ts, &tmp );
1689 i++;
1690 break;
1691 }
1692 else if (me == here->thr) {
1693 tmp = *here;
1694 tmp.tym++;
1695 VG_(addToXA)( res->ts, &tmp );
1696 i++;
1697 break;
1698 }
1699 else /* me > here->thr */ {
1700 tmp = *here;
1701 VG_(addToXA)( res->ts, &tmp );
1702 }
1703 }
1704 tl_assert(i >= 0 && i <= n);
1705 if (i == n && here && here->thr < me) {
1706 tmp.thr = me;
1707 tmp.tym = 1;
1708 VG_(addToXA)( res->ts, &tmp );
1709 } else {
1710 for (/*keepgoing*/; i < n; i++) {
1711 here = VG_(indexXA)( vts->ts, i );
1712 tmp = *here;
1713 VG_(addToXA)( res->ts, &tmp );
1714 }
1715 }
1716 tl_assert(is_sane_VTS(res));
1717 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1718 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1719 return res;
1720}
1721
1722
1723/* Return a new VTS constructed as the join (max) of the 2 args.
1724 Neither arg is modified.
1725*/
1726VTS* VTS__join ( VTS* a, VTS* b )
1727{
1728 Word ia, ib, useda, usedb;
1729 ULong tyma, tymb, tymMax;
1730 Thr* thr;
1731 VTS* res;
sewardjf98e1c02008-10-25 16:22:41 +00001732
1733 tl_assert(a && a->ts);
1734 tl_assert(b && b->ts);
1735 useda = VG_(sizeXA)( a->ts );
1736 usedb = VG_(sizeXA)( b->ts );
1737
1738 res = VTS__new();
1739 ia = ib = 0;
1740
1741 while (1) {
1742
1743 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1744 from a and b in order, where thr is the next Thr*
1745 occurring in either a or b, and tyma/b are the relevant
1746 scalar timestamps, taking into account implicit zeroes. */
1747 tl_assert(ia >= 0 && ia <= useda);
1748 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001749
njn4c245e52009-03-15 23:25:38 +00001750 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001751 /* both empty - done */
1752 break;
njn4c245e52009-03-15 23:25:38 +00001753
1754 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001755 /* a empty, use up b */
njn4c245e52009-03-15 23:25:38 +00001756 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001757 thr = tmpb->thr;
1758 tyma = 0;
1759 tymb = tmpb->tym;
1760 ib++;
njn4c245e52009-03-15 23:25:38 +00001761
1762 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001763 /* b empty, use up a */
njn4c245e52009-03-15 23:25:38 +00001764 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
sewardjf98e1c02008-10-25 16:22:41 +00001765 thr = tmpa->thr;
1766 tyma = tmpa->tym;
1767 tymb = 0;
1768 ia++;
njn4c245e52009-03-15 23:25:38 +00001769
1770 } else {
sewardjf98e1c02008-10-25 16:22:41 +00001771 /* both not empty; extract lowest-Thr*'d triple */
njn4c245e52009-03-15 23:25:38 +00001772 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1773 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001774 if (tmpa->thr < tmpb->thr) {
1775 /* a has the lowest unconsidered Thr* */
1776 thr = tmpa->thr;
1777 tyma = tmpa->tym;
1778 tymb = 0;
1779 ia++;
njn4c245e52009-03-15 23:25:38 +00001780 } else if (tmpa->thr > tmpb->thr) {
sewardjf98e1c02008-10-25 16:22:41 +00001781 /* b has the lowest unconsidered Thr* */
1782 thr = tmpb->thr;
1783 tyma = 0;
1784 tymb = tmpb->tym;
1785 ib++;
1786 } else {
1787 /* they both next mention the same Thr* */
1788 tl_assert(tmpa->thr == tmpb->thr);
1789 thr = tmpa->thr; /* == tmpb->thr */
1790 tyma = tmpa->tym;
1791 tymb = tmpb->tym;
1792 ia++;
1793 ib++;
1794 }
1795 }
1796
1797 /* having laboriously determined (thr, tyma, tymb), do something
1798 useful with it. */
1799 tymMax = tyma > tymb ? tyma : tymb;
1800 if (tymMax > 0) {
1801 ScalarTS st;
1802 st.thr = thr;
1803 st.tym = tymMax;
1804 VG_(addToXA)( res->ts, &st );
1805 }
1806
1807 }
1808
1809 tl_assert(is_sane_VTS( res ));
1810
1811 return res;
1812}
1813
1814
sewardj23f12002009-07-24 08:45:08 +00001815/* Determine if 'a' <= 'b', in the partial ordering. Returns NULL if
1816 they are, or the first Thr* for which they are not. This rather
1817 strange convention is used because sometimes we want to know the
1818 actual index at which they first differ. */
1819static Thr* VTS__cmpLEQ ( VTS* a, VTS* b )
sewardjf98e1c02008-10-25 16:22:41 +00001820{
sewardj23f12002009-07-24 08:45:08 +00001821 Word ia, ib, useda, usedb;
1822 ULong tyma, tymb;
sewardjf98e1c02008-10-25 16:22:41 +00001823
1824 tl_assert(a && a->ts);
1825 tl_assert(b && b->ts);
1826 useda = VG_(sizeXA)( a->ts );
1827 usedb = VG_(sizeXA)( b->ts );
1828
1829 ia = ib = 0;
1830
1831 while (1) {
1832
njn4c245e52009-03-15 23:25:38 +00001833 /* This logic is to enumerate doubles (tyma, tymb) drawn
1834 from a and b in order, and tyma/b are the relevant
sewardjf98e1c02008-10-25 16:22:41 +00001835 scalar timestamps, taking into account implicit zeroes. */
sewardj23f12002009-07-24 08:45:08 +00001836 Thr* thr;
1837
sewardjf98e1c02008-10-25 16:22:41 +00001838 tl_assert(ia >= 0 && ia <= useda);
1839 tl_assert(ib >= 0 && ib <= usedb);
sewardjf98e1c02008-10-25 16:22:41 +00001840
njn4c245e52009-03-15 23:25:38 +00001841 if (ia == useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001842 /* both empty - done */
1843 break;
njn4c245e52009-03-15 23:25:38 +00001844
1845 } else if (ia == useda && ib != usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001846 /* a empty, use up b */
njn4c245e52009-03-15 23:25:38 +00001847 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001848 tyma = 0;
1849 tymb = tmpb->tym;
sewardj23f12002009-07-24 08:45:08 +00001850 thr = tmpb->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001851 ib++;
njn4c245e52009-03-15 23:25:38 +00001852
1853 } else if (ia != useda && ib == usedb) {
sewardjf98e1c02008-10-25 16:22:41 +00001854 /* b empty, use up a */
njn4c245e52009-03-15 23:25:38 +00001855 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
sewardjf98e1c02008-10-25 16:22:41 +00001856 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001857 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001858 tymb = 0;
1859 ia++;
njn4c245e52009-03-15 23:25:38 +00001860
1861 } else {
sewardjf98e1c02008-10-25 16:22:41 +00001862 /* both not empty; extract lowest-Thr*'d triple */
njn4c245e52009-03-15 23:25:38 +00001863 ScalarTS* tmpa = VG_(indexXA)( a->ts, ia );
1864 ScalarTS* tmpb = VG_(indexXA)( b->ts, ib );
sewardjf98e1c02008-10-25 16:22:41 +00001865 if (tmpa->thr < tmpb->thr) {
1866 /* a has the lowest unconsidered Thr* */
sewardjf98e1c02008-10-25 16:22:41 +00001867 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001868 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001869 tymb = 0;
1870 ia++;
1871 }
1872 else
1873 if (tmpa->thr > tmpb->thr) {
1874 /* b has the lowest unconsidered Thr* */
sewardjf98e1c02008-10-25 16:22:41 +00001875 tyma = 0;
1876 tymb = tmpb->tym;
sewardj23f12002009-07-24 08:45:08 +00001877 thr = tmpb->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001878 ib++;
1879 } else {
1880 /* they both next mention the same Thr* */
1881 tl_assert(tmpa->thr == tmpb->thr);
sewardjf98e1c02008-10-25 16:22:41 +00001882 tyma = tmpa->tym;
sewardj23f12002009-07-24 08:45:08 +00001883 thr = tmpa->thr;
sewardjf98e1c02008-10-25 16:22:41 +00001884 tymb = tmpb->tym;
1885 ia++;
1886 ib++;
1887 }
1888 }
1889
njn4c245e52009-03-15 23:25:38 +00001890 /* having laboriously determined (tyma, tymb), do something
sewardjf98e1c02008-10-25 16:22:41 +00001891 useful with it. */
sewardj23f12002009-07-24 08:45:08 +00001892 if (tyma > tymb) {
1893 /* not LEQ at this index. Quit, since the answer is
1894 determined already. */
1895 tl_assert(thr);
1896 return thr;
1897 }
sewardjf98e1c02008-10-25 16:22:41 +00001898 }
1899
sewardj23f12002009-07-24 08:45:08 +00001900 return NULL; /* all points are LEQ */
sewardjf98e1c02008-10-25 16:22:41 +00001901}
1902
1903
1904/* Compute an arbitrary structural (total) ordering on the two args,
1905 based on their VCs, so they can be looked up in a table, tree, etc.
1906 Returns -1, 0 or 1. (really just 'deriving Ord' :-)
1907*/
1908Word VTS__cmp_structural ( VTS* a, VTS* b )
1909{
1910 /* We just need to generate an arbitrary total ordering based on
1911 a->ts and b->ts. Preferably do it in a way which comes across likely
1912 differences relatively quickly. */
1913 Word i, useda, usedb;
1914 ScalarTS *tmpa, *tmpb;
1915
1916 tl_assert(a && a->ts);
1917 tl_assert(b && b->ts);
1918 useda = VG_(sizeXA)( a->ts );
1919 usedb = VG_(sizeXA)( b->ts );
1920
1921 if (useda < usedb) return -1;
1922 if (useda > usedb) return 1;
1923
1924 /* Same length vectors, so let's step through them together. */
1925 tl_assert(useda == usedb);
1926 for (i = 0; i < useda; i++) {
1927 tmpa = VG_(indexXA)( a->ts, i );
1928 tmpb = VG_(indexXA)( b->ts, i );
1929 if (tmpa->tym < tmpb->tym) return -1;
1930 if (tmpa->tym > tmpb->tym) return 1;
1931 if (tmpa->thr < tmpb->thr) return -1;
1932 if (tmpa->thr > tmpb->thr) return 1;
1933 }
1934
1935 /* They're identical. */
1936 return 0;
1937}
1938
1939
1940/* Debugging only. Display the given VTS in the buffer.
1941*/
1942void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1943 ScalarTS* st;
1944 HChar unit[64];
1945 Word i, n;
1946 Int avail = nBuf;
1947 tl_assert(vts && vts->ts);
1948 tl_assert(nBuf > 16);
1949 buf[0] = '[';
1950 buf[1] = 0;
1951 n = VG_(sizeXA)( vts->ts );
1952 for (i = 0; i < n; i++) {
1953 tl_assert(avail >= 40);
1954 st = VG_(indexXA)( vts->ts, i );
1955 VG_(memset)(unit, 0, sizeof(unit));
1956 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1957 st->thr, st->tym);
1958 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1959 VG_(strcat)(buf, " ...]");
1960 buf[nBuf-1] = 0;
1961 return;
1962 }
1963 VG_(strcat)(buf, unit);
1964 avail -= VG_(strlen)(unit);
1965 }
1966 VG_(strcat)(buf, "]");
1967 buf[nBuf-1] = 0;
1968}
1969
1970
1971/* Debugging only. Return vts[index], so to speak.
1972*/
1973ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
1974 UWord i, n;
1975 tl_assert(vts && vts->ts);
1976 n = VG_(sizeXA)( vts->ts );
1977 for (i = 0; i < n; i++) {
1978 ScalarTS* st = VG_(indexXA)( vts->ts, i );
1979 if (st->thr == idx)
1980 return st->tym;
1981 }
1982 return 0;
1983}
1984
1985
1986/////////////////////////////////////////////////////////////////
1987/////////////////////////////////////////////////////////////////
1988// //
1989// SECTION END vts primitives //
1990// //
1991/////////////////////////////////////////////////////////////////
1992/////////////////////////////////////////////////////////////////
1993
1994
1995
1996/////////////////////////////////////////////////////////////////
1997/////////////////////////////////////////////////////////////////
1998// //
1999// SECTION BEGIN main library //
2000// //
2001/////////////////////////////////////////////////////////////////
2002/////////////////////////////////////////////////////////////////
2003
2004
2005/////////////////////////////////////////////////////////
2006// //
2007// VTS set //
2008// //
2009/////////////////////////////////////////////////////////
2010
2011static WordFM* /* VTS* void void */ vts_set = NULL;
2012
2013static void vts_set_init ( void )
2014{
2015 tl_assert(!vts_set);
2016 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2017 HG_(free),
2018 (Word(*)(UWord,UWord))VTS__cmp_structural );
2019 tl_assert(vts_set);
2020}
2021
2022/* Given a newly made VTS, look in vts_set to see if we already have
2023 an identical one. If yes, free up this one and return instead a
2024 pointer to the existing one. If no, add this one to the set and
2025 return the same pointer. Caller differentiates the two cases by
2026 comparing returned pointer with the supplied one (although that
2027 does require that the supplied VTS is not already in the set).
2028*/
2029static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2030{
2031 UWord keyW, valW;
2032 /* lookup cand (by value) */
2033 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2034 /* found it */
2035 tl_assert(valW == 0);
2036 /* if this fails, cand (by ref) was already present (!) */
2037 tl_assert(keyW != (UWord)cand);
2038 VTS__delete(cand);
2039 return (VTS*)keyW;
2040 } else {
2041 /* not present. Add and return pointer to same. */
2042 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2043 return cand;
2044 }
2045}
2046
2047
2048/////////////////////////////////////////////////////////
2049// //
2050// VTS table //
2051// //
2052/////////////////////////////////////////////////////////
2053
2054static void VtsID__invalidate_caches ( void ); /* fwds */
2055
2056/* A type to hold VTS table entries. Invariants:
2057 If .vts == NULL, then this entry is not in use, so:
2058 - .rc == 0
2059 - this entry is on the freelist (unfortunately, does not imply
2060 any constraints on value for .nextfree)
2061 If .vts != NULL, then this entry is in use:
2062 - .vts is findable in vts_set
2063 - .vts->id == this entry number
2064 - no specific value for .rc (even 0 is OK)
2065 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2066*/
2067typedef
2068 struct {
2069 VTS* vts; /* vts, in vts_set */
2070 UWord rc; /* reference count - enough for entire aspace */
2071 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2072 }
2073 VtsTE;
2074
2075/* The VTS table. */
2076static XArray* /* of VtsTE */ vts_tab = NULL;
2077
2078/* An index into the VTS table, indicating the start of the list of
2079 free (available for use) entries. If the list is empty, this is
2080 VtsID_INVALID. */
2081static VtsID vts_tab_freelist = VtsID_INVALID;
2082
2083/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2084 vts_tab equals or exceeds this size. After GC, the value here is
2085 set appropriately so as to check for the next GC point. */
2086static Word vts_next_GC_at = 1000;
2087
2088static void vts_tab_init ( void )
2089{
2090 vts_tab
2091 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2092 HG_(free), sizeof(VtsTE) );
2093 vts_tab_freelist
2094 = VtsID_INVALID;
2095 tl_assert(vts_tab);
2096}
2097
2098/* Add ii to the free list, checking that it looks out-of-use. */
2099static void add_to_free_list ( VtsID ii )
2100{
2101 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2102 tl_assert(ie->vts == NULL);
2103 tl_assert(ie->rc == 0);
2104 tl_assert(ie->freelink == VtsID_INVALID);
2105 ie->freelink = vts_tab_freelist;
2106 vts_tab_freelist = ii;
2107}
2108
2109/* Get an entry from the free list. This will return VtsID_INVALID if
2110 the free list is empty. */
2111static VtsID get_from_free_list ( void )
2112{
2113 VtsID ii;
2114 VtsTE* ie;
2115 if (vts_tab_freelist == VtsID_INVALID)
2116 return VtsID_INVALID;
2117 ii = vts_tab_freelist;
2118 ie = VG_(indexXA)( vts_tab, ii );
2119 tl_assert(ie->vts == NULL);
2120 tl_assert(ie->rc == 0);
2121 vts_tab_freelist = ie->freelink;
2122 return ii;
2123}
2124
2125/* Produce a new VtsID that can be used, either by getting it from
2126 the freelist, or, if that is empty, by expanding vts_tab. */
2127static VtsID get_new_VtsID ( void )
2128{
2129 VtsID ii;
2130 VtsTE te;
2131 ii = get_from_free_list();
2132 if (ii != VtsID_INVALID)
2133 return ii;
2134 te.vts = NULL;
2135 te.rc = 0;
2136 te.freelink = VtsID_INVALID;
2137 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2138 return ii;
2139}
2140
2141
2142/* Indirect callback from lib_zsm. */
2143static void VtsID__rcinc ( VtsID ii )
2144{
2145 VtsTE* ie;
2146 /* VG_(indexXA) does a range check for us */
2147 ie = VG_(indexXA)( vts_tab, ii );
2148 tl_assert(ie->vts); /* else it's not in use */
2149 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2150 tl_assert(ie->vts->id == ii);
2151 ie->rc++;
2152}
2153
2154/* Indirect callback from lib_zsm. */
2155static void VtsID__rcdec ( VtsID ii )
2156{
2157 VtsTE* ie;
2158 /* VG_(indexXA) does a range check for us */
2159 ie = VG_(indexXA)( vts_tab, ii );
2160 tl_assert(ie->vts); /* else it's not in use */
2161 tl_assert(ie->rc > 0); /* else RC snafu */
2162 tl_assert(ie->vts->id == ii);
2163 ie->rc--;
2164}
2165
2166
2167/* Look up 'cand' in our collection of VTSs. If present, deallocate
2168 it and return the VtsID for the pre-existing version. If not
2169 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2170 for it, and return that. */
2171static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2172{
2173 VTS* auld;
2174 tl_assert(cand->id == VtsID_INVALID);
2175 auld = vts_set__find_and_dealloc__or_add(cand);
2176 if (auld != cand) {
2177 /* We already have an Aulde one. Use that. */
2178 VtsTE* ie;
2179 tl_assert(auld->id != VtsID_INVALID);
2180 ie = VG_(indexXA)( vts_tab, auld->id );
2181 tl_assert(ie->vts == auld);
2182 return auld->id;
2183 } else {
2184 VtsID ii = get_new_VtsID();
2185 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2186 ie->vts = cand;
2187 ie->rc = 0;
2188 ie->freelink = VtsID_INVALID;
2189 cand->id = ii;
2190 return ii;
2191 }
2192}
2193
2194
2195static void show_vts_stats ( HChar* caller )
2196{
2197 UWord nSet, nTab, nLive;
2198 ULong totrc;
2199 UWord n, i;
2200 nSet = VG_(sizeFM)( vts_set );
2201 nTab = VG_(sizeXA)( vts_tab );
2202 totrc = 0;
2203 nLive = 0;
2204 n = VG_(sizeXA)( vts_tab );
2205 for (i = 0; i < n; i++) {
2206 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2207 if (ie->vts) {
2208 nLive++;
2209 totrc += (ULong)ie->rc;
2210 } else {
2211 tl_assert(ie->rc == 0);
2212 }
2213 }
2214 VG_(printf)(" show_vts_stats %s\n", caller);
2215 VG_(printf)(" vts_tab size %4lu\n", nTab);
2216 VG_(printf)(" vts_tab live %4lu\n", nLive);
2217 VG_(printf)(" vts_set size %4lu\n", nSet);
2218 VG_(printf)(" total rc %4llu\n", totrc);
2219}
2220
2221/* NOT TO BE CALLED FROM WITHIN libzsm. */
sewardj8fd92d32008-11-20 23:17:01 +00002222__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00002223static void vts_tab__do_GC ( Bool show_stats )
2224{
2225 UWord i, nTab, nLive, nFreed;
2226
2227 /* check this is actually necessary. */
2228 tl_assert(vts_tab_freelist == VtsID_INVALID);
2229
2230 /* empty the caches for partial order checks and binary joins. We
2231 could do better and prune out the entries to be deleted, but it
2232 ain't worth the hassle. */
2233 VtsID__invalidate_caches();
2234
2235 /* First, make the reference counts up to date. */
2236 zsm_flush_cache();
2237
2238 nTab = VG_(sizeXA)( vts_tab );
2239
2240 if (show_stats) {
2241 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2242 show_vts_stats("before GC");
2243 }
2244
2245 /* Now we can inspect the entire vts_tab. Any entries
2246 with zero .rc fields are now no longer in use and can be
2247 free list, removed from vts_set, and deleted. */
2248 nFreed = 0;
2249 for (i = 0; i < nTab; i++) {
2250 Bool present;
2251 UWord oldK = 0, oldV = 0;
2252 VtsTE* te = VG_(indexXA)( vts_tab, i );
2253 if (te->vts == NULL) {
2254 tl_assert(te->rc == 0);
2255 continue; /* already on the free list (presumably) */
2256 }
2257 if (te->rc > 0)
2258 continue; /* in use */
2259 /* Ok, we got one we can free. */
2260 tl_assert(te->vts->id == i);
2261 /* first, remove it from vts_set. */
2262 present = VG_(delFromFM)( vts_set,
2263 &oldK, &oldV, (UWord)te->vts );
2264 tl_assert(present); /* else it isn't in vts_set ?! */
2265 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2266 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2267 /* now free the VTS itself */
2268 VTS__delete(te->vts);
2269 te->vts = NULL;
2270 /* and finally put this entry on the free list */
2271 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2272 add_to_free_list( i );
2273 nFreed++;
2274 }
2275
2276 /* Now figure out when the next GC should be. We'll allow the
2277 number of VTSs to double before GCing again. Except of course
2278 that since we can't (or, at least, don't) shrink vts_tab, we
2279 can't set the threshhold value smaller than it. */
2280 tl_assert(nFreed <= nTab);
2281 nLive = nTab - nFreed;
2282 tl_assert(nLive >= 0 && nLive <= nTab);
2283 vts_next_GC_at = 2 * nLive;
2284 if (vts_next_GC_at < nTab)
2285 vts_next_GC_at = nTab;
2286
2287 if (show_stats) {
2288 show_vts_stats("after GC");
2289 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2290 }
2291
sewardjd024ae52008-11-09 20:47:57 +00002292 if (VG_(clo_verbosity) > 1) {
sewardjf98e1c02008-10-25 16:22:41 +00002293 static UInt ctr = 0;
2294 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002295 VG_(message)(Vg_DebugMsg,
sewardj24118492009-07-15 14:50:02 +00002296 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)\n",
sewardj8aa41de2009-01-22 12:24:26 +00002297 ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
sewardjf98e1c02008-10-25 16:22:41 +00002298 }
2299}
2300
2301
2302/////////////////////////////////////////////////////////
2303// //
2304// Vts IDs //
2305// //
2306/////////////////////////////////////////////////////////
2307
2308//////////////////////////
sewardj23f12002009-07-24 08:45:08 +00002309static ULong stats__cmpLEQ_queries = 0;
2310static ULong stats__cmpLEQ_misses = 0;
2311static ULong stats__join2_queries = 0;
2312static ULong stats__join2_misses = 0;
sewardjf98e1c02008-10-25 16:22:41 +00002313
2314static inline UInt ROL32 ( UInt w, Int n ) {
2315 w = (w << n) | (w >> (32-n));
2316 return w;
2317}
2318static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2319 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2320 return hash % nTab;
2321}
2322
sewardj23f12002009-07-24 08:45:08 +00002323#define N_CMPLEQ_CACHE 1023
sewardjf98e1c02008-10-25 16:22:41 +00002324static
sewardj23f12002009-07-24 08:45:08 +00002325 struct { VtsID vi1; VtsID vi2; Bool leq; }
2326 cmpLEQ_cache[N_CMPLEQ_CACHE];
sewardjf98e1c02008-10-25 16:22:41 +00002327
2328#define N_JOIN2_CACHE 1023
2329static
2330 struct { VtsID vi1; VtsID vi2; VtsID res; }
2331 join2_cache[N_JOIN2_CACHE];
2332
2333static void VtsID__invalidate_caches ( void ) {
2334 Int i;
sewardj23f12002009-07-24 08:45:08 +00002335 for (i = 0; i < N_CMPLEQ_CACHE; i++) {
2336 cmpLEQ_cache[i].vi1 = VtsID_INVALID;
2337 cmpLEQ_cache[i].vi2 = VtsID_INVALID;
2338 cmpLEQ_cache[i].leq = False;
sewardjf98e1c02008-10-25 16:22:41 +00002339 }
2340 for (i = 0; i < N_JOIN2_CACHE; i++) {
2341 join2_cache[i].vi1 = VtsID_INVALID;
2342 join2_cache[i].vi2 = VtsID_INVALID;
2343 join2_cache[i].res = VtsID_INVALID;
2344 }
2345}
2346//////////////////////////
2347
sewardjd52392d2008-11-08 20:36:26 +00002348//static Bool VtsID__is_valid ( VtsID vi ) {
2349// VtsTE* ve;
2350// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2351// return False;
2352// ve = VG_(indexXA)( vts_tab, vi );
2353// if (!ve->vts)
2354// return False;
2355// tl_assert(ve->vts->id == vi);
2356// return True;
2357//}
sewardjf98e1c02008-10-25 16:22:41 +00002358
2359static VTS* VtsID__to_VTS ( VtsID vi ) {
2360 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2361 tl_assert(te->vts);
2362 return te->vts;
2363}
2364
2365static void VtsID__pp ( VtsID vi ) {
2366 HChar buf[100];
2367 VTS* vts = VtsID__to_VTS(vi);
2368 VTS__show( buf, sizeof(buf)-1, vts );
2369 buf[sizeof(buf)-1] = 0;
2370 VG_(printf)("%s", buf);
2371}
2372
2373/* compute partial ordering relation of vi1 and vi2. */
2374__attribute__((noinline))
sewardj23f12002009-07-24 08:45:08 +00002375static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
sewardjf98e1c02008-10-25 16:22:41 +00002376 UInt hash;
sewardj23f12002009-07-24 08:45:08 +00002377 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00002378 VTS *v1, *v2;
sewardj23f12002009-07-24 08:45:08 +00002379 //if (vi1 == vi2) return True;
sewardjf98e1c02008-10-25 16:22:41 +00002380 tl_assert(vi1 != vi2);
2381 ////++
sewardj23f12002009-07-24 08:45:08 +00002382 stats__cmpLEQ_queries++;
2383 hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
2384 if (cmpLEQ_cache[hash].vi1 == vi1
2385 && cmpLEQ_cache[hash].vi2 == vi2)
2386 return cmpLEQ_cache[hash].leq;
2387 stats__cmpLEQ_misses++;
sewardjf98e1c02008-10-25 16:22:41 +00002388 ////--
2389 v1 = VtsID__to_VTS(vi1);
2390 v2 = VtsID__to_VTS(vi2);
sewardj23f12002009-07-24 08:45:08 +00002391 leq = VTS__cmpLEQ( v1, v2 ) == NULL;
sewardjf98e1c02008-10-25 16:22:41 +00002392 ////++
sewardj23f12002009-07-24 08:45:08 +00002393 cmpLEQ_cache[hash].vi1 = vi1;
2394 cmpLEQ_cache[hash].vi2 = vi2;
2395 cmpLEQ_cache[hash].leq = leq;
sewardjf98e1c02008-10-25 16:22:41 +00002396 ////--
sewardj23f12002009-07-24 08:45:08 +00002397 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00002398}
sewardj23f12002009-07-24 08:45:08 +00002399static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
2400 return LIKELY(vi1 == vi2) ? True : VtsID__cmpLEQ_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00002401}
2402
2403/* compute binary join */
2404__attribute__((noinline))
2405static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2406 UInt hash;
2407 VtsID res;
2408 VTS *vts1, *vts2, *nyu;
2409 //if (vi1 == vi2) return vi1;
2410 tl_assert(vi1 != vi2);
2411 ////++
2412 stats__join2_queries++;
2413 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2414 if (join2_cache[hash].vi1 == vi1
2415 && join2_cache[hash].vi2 == vi2)
2416 return join2_cache[hash].res;
2417 stats__join2_misses++;
2418 ////--
2419 vts1 = VtsID__to_VTS(vi1);
2420 vts2 = VtsID__to_VTS(vi2);
2421 nyu = VTS__join(vts1,vts2);
2422 res = vts_tab__find_and_dealloc__or_add(nyu);
2423 ////++
2424 join2_cache[hash].vi1 = vi1;
2425 join2_cache[hash].vi2 = vi2;
2426 join2_cache[hash].res = res;
2427 ////--
2428 return res;
2429}
2430static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00002431 return LIKELY(vi1 == vi2) ? vi1 : VtsID__join2_WRK(vi1, vi2);
sewardjf98e1c02008-10-25 16:22:41 +00002432}
2433
2434/* create a singleton VTS, namely [thr:1] */
2435static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2436 VTS* nyu = VTS__singleton(thr,tym);
2437 return vts_tab__find_and_dealloc__or_add(nyu);
2438}
2439
2440/* tick operation, creates value 1 if specified index is absent */
2441static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2442 VTS* vts = VtsID__to_VTS(vi);
2443 VTS* nyu = VTS__tick(idx,vts);
2444 return vts_tab__find_and_dealloc__or_add(nyu);
2445}
2446
2447/* index into a VTS (only for assertions) */
2448static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2449 VTS* vts = VtsID__to_VTS(vi);
2450 return VTS__indexAt_SLOW( vts, idx );
2451}
2452
sewardj23f12002009-07-24 08:45:08 +00002453/* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
2454 any, really) element in vi1 which is pointwise greater-than the
2455 corresponding element in vi2. If no such element exists, return
2456 NULL. This needs to be fairly quick since it is called every time
2457 a race is detected. */
2458static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
2459{
2460 VTS *vts1, *vts2;
2461 Thr* diffthr;
2462 tl_assert(vi1 != vi2);
2463 vts1 = VtsID__to_VTS(vi1);
2464 vts2 = VtsID__to_VTS(vi2);
2465 tl_assert(vts1 != vts2);
2466 diffthr = VTS__cmpLEQ(vts1, vts2);
2467 tl_assert(diffthr); /* else they are LEQ ! */
2468 return diffthr;
2469}
2470
2471
2472/////////////////////////////////////////////////////////
2473// //
2474// Filters //
2475// //
2476/////////////////////////////////////////////////////////
2477
2478// baseline: 5, 9
2479#define FI_LINE_SZB_LOG2 5
2480#define FI_NUM_LINES_LOG2 10
2481
2482#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
2483#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
2484
2485#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
2486#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
2487
2488#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
2489 & (Addr)(FI_NUM_LINES-1) )
2490
2491
2492/* In the lines, each 8 bytes are treated individually, and are mapped
2493 to a UShort. Regardless of endianness of the underlying machine,
2494 bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
2495 the highest address.
2496
2497 Of each bit pair, the higher numbered bit is set if a R has been
2498 seen, so the actual layout is:
2499
2500 15 14 ... 01 00
2501
2502 R W for addr+7 ... R W for addr+0
2503
2504 So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
2505*/
2506
2507/* tags are separated from lines. tags are Addrs and are
2508 the base address of the line. */
2509typedef
2510 struct {
2511 UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
2512 }
2513 FiLine;
2514
2515typedef
2516 struct {
2517 Addr tags[FI_NUM_LINES];
2518 FiLine lines[FI_NUM_LINES];
2519 }
2520 Filter;
2521
2522/* Forget everything we know -- clear the filter and let everything
2523 through. This needs to be as fast as possible, since it is called
2524 every time the running thread changes, and every time a thread's
2525 vector clocks change, which can be quite frequent. The obvious
2526 fast way to do this is simply to stuff in tags which we know are
2527 not going to match anything, since they're not aligned to the start
2528 of a line. */
2529static void Filter__clear ( Filter* fi, HChar* who )
2530{
2531 UWord i;
2532 if (0) VG_(printf)(" Filter__clear(%p, %s)\n", fi, who);
2533 for (i = 0; i < FI_NUM_LINES; i += 8) {
2534 fi->tags[i+0] = 1; /* impossible value -- cannot match */
2535 fi->tags[i+1] = 1;
2536 fi->tags[i+2] = 1;
2537 fi->tags[i+3] = 1;
2538 fi->tags[i+4] = 1;
2539 fi->tags[i+5] = 1;
2540 fi->tags[i+6] = 1;
2541 fi->tags[i+7] = 1;
2542 }
2543 tl_assert(i == FI_NUM_LINES);
2544}
2545
2546/* Clearing an arbitrary range in the filter. Unfortunately
2547 we have to do this due to core-supplied new/die-mem events. */
2548
2549static void Filter__clear_1byte ( Filter* fi, Addr a )
2550{
2551 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2552 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2553 FiLine* line = &fi->lines[lineno];
2554 UWord loff = (a - atag) / 8;
2555 UShort mask = 0x3 << (2 * (a & 7));
2556 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
2557 if (LIKELY( fi->tags[lineno] == atag )) {
2558 /* hit. clear the bits. */
2559 UShort u16 = line->u16s[loff];
2560 line->u16s[loff] = u16 & ~mask; /* clear them */
2561 } else {
2562 /* miss. The filter doesn't hold this address, so ignore. */
2563 }
2564}
2565
2566static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
2567{
2568 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2569 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2570 FiLine* line = &fi->lines[lineno];
2571 UWord loff = (a - atag) / 8;
2572 if (LIKELY( fi->tags[lineno] == atag )) {
2573 line->u16s[loff] = 0;
2574 } else {
2575 /* miss. The filter doesn't hold this address, so ignore. */
2576 }
2577}
2578
2579static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
2580{
2581 //VG_(printf)("%lu ", len);
2582 /* slowly do part preceding 8-alignment */
2583 while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
2584 Filter__clear_1byte( fi, a );
2585 a++;
2586 len--;
2587 }
2588 /* vector loop */
2589 while (len >= 8) {
2590 Filter__clear_8bytes_aligned( fi, a );
2591 a += 8;
2592 len -= 8;
2593 }
2594 /* slowly do tail */
2595 while (UNLIKELY(len > 0)) {
2596 Filter__clear_1byte( fi, a );
2597 a++;
2598 len--;
2599 }
2600}
2601
2602
2603/* ------ Read handlers for the filter. ------ */
2604
2605static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
2606{
2607 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2608 return False;
2609 {
2610 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2611 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2612 FiLine* line = &fi->lines[lineno];
2613 UWord loff = (a - atag) / 8;
2614 UShort mask = 0xAAAA;
2615 if (LIKELY( fi->tags[lineno] == atag )) {
2616 /* hit. check line and update. */
2617 UShort u16 = line->u16s[loff];
2618 Bool ok = (u16 & mask) == mask; /* all R bits set? */
2619 line->u16s[loff] = u16 | mask; /* set them */
2620 return ok;
2621 } else {
2622 /* miss. nuke existing line and re-use it. */
2623 UWord i;
2624 fi->tags[lineno] = atag;
2625 for (i = 0; i < FI_LINE_SZB / 8; i++)
2626 line->u16s[i] = 0;
2627 line->u16s[loff] = mask;
2628 return False;
2629 }
2630 }
2631}
2632
2633static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
2634{
2635 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2636 return False;
2637 {
2638 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2639 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2640 FiLine* line = &fi->lines[lineno];
2641 UWord loff = (a - atag) / 8;
2642 UShort mask = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
2643 if (LIKELY( fi->tags[lineno] == atag )) {
2644 /* hit. check line and update. */
2645 UShort u16 = line->u16s[loff];
2646 Bool ok = (u16 & mask) == mask; /* 4 x R bits set? */
2647 line->u16s[loff] = u16 | mask; /* set them */
2648 return ok;
2649 } else {
2650 /* miss. nuke existing line and re-use it. */
2651 UWord i;
2652 fi->tags[lineno] = atag;
2653 for (i = 0; i < FI_LINE_SZB / 8; i++)
2654 line->u16s[i] = 0;
2655 line->u16s[loff] = mask;
2656 return False;
2657 }
2658 }
2659}
2660
2661static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
2662{
2663 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2664 return False;
2665 {
2666 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2667 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2668 FiLine* line = &fi->lines[lineno];
2669 UWord loff = (a - atag) / 8;
2670 UShort mask = 0xA << (2 * (a & 6));
2671 /* mask is A000, 0A00, 00A0 or 000A */
2672 if (LIKELY( fi->tags[lineno] == atag )) {
2673 /* hit. check line and update. */
2674 UShort u16 = line->u16s[loff];
2675 Bool ok = (u16 & mask) == mask; /* 2 x R bits set? */
2676 line->u16s[loff] = u16 | mask; /* set them */
2677 return ok;
2678 } else {
2679 /* miss. nuke existing line and re-use it. */
2680 UWord i;
2681 fi->tags[lineno] = atag;
2682 for (i = 0; i < FI_LINE_SZB / 8; i++)
2683 line->u16s[i] = 0;
2684 line->u16s[loff] = mask;
2685 return False;
2686 }
2687 }
2688}
2689
2690static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
2691{
2692 {
2693 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2694 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2695 FiLine* line = &fi->lines[lineno];
2696 UWord loff = (a - atag) / 8;
2697 UShort mask = 0x2 << (2 * (a & 7));
2698 /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
2699 if (LIKELY( fi->tags[lineno] == atag )) {
2700 /* hit. check line and update. */
2701 UShort u16 = line->u16s[loff];
2702 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
2703 line->u16s[loff] = u16 | mask; /* set them */
2704 return ok;
2705 } else {
2706 /* miss. nuke existing line and re-use it. */
2707 UWord i;
2708 fi->tags[lineno] = atag;
2709 for (i = 0; i < FI_LINE_SZB / 8; i++)
2710 line->u16s[i] = 0;
2711 line->u16s[loff] = mask;
2712 return False;
2713 }
2714 }
2715}
2716
2717
2718/* ------ Write handlers for the filter. ------ */
2719
2720static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
2721{
2722 if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
2723 return False;
2724 {
2725 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2726 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2727 FiLine* line = &fi->lines[lineno];
2728 UWord loff = (a - atag) / 8;
2729 UShort mask = 0xFFFF;
2730 if (LIKELY( fi->tags[lineno] == atag )) {
2731 /* hit. check line and update. */
2732 UShort u16 = line->u16s[loff];
2733 Bool ok = (u16 & mask) == mask; /* all R & W bits set? */
2734 line->u16s[loff] = u16 | mask; /* set them */
2735 return ok;
2736 } else {
2737 /* miss. nuke existing line and re-use it. */
2738 UWord i;
2739 fi->tags[lineno] = atag;
2740 for (i = 0; i < FI_LINE_SZB / 8; i++)
2741 line->u16s[i] = 0;
2742 line->u16s[loff] = mask;
2743 return False;
2744 }
2745 }
2746}
2747
2748static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
2749{
2750 if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
2751 return False;
2752 {
2753 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2754 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2755 FiLine* line = &fi->lines[lineno];
2756 UWord loff = (a - atag) / 8;
2757 UShort mask = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
2758 if (LIKELY( fi->tags[lineno] == atag )) {
2759 /* hit. check line and update. */
2760 UShort u16 = line->u16s[loff];
2761 Bool ok = (u16 & mask) == mask; /* 4 x R & W bits set? */
2762 line->u16s[loff] = u16 | mask; /* set them */
2763 return ok;
2764 } else {
2765 /* miss. nuke existing line and re-use it. */
2766 UWord i;
2767 fi->tags[lineno] = atag;
2768 for (i = 0; i < FI_LINE_SZB / 8; i++)
2769 line->u16s[i] = 0;
2770 line->u16s[loff] = mask;
2771 return False;
2772 }
2773 }
2774}
2775
2776static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
2777{
2778 if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
2779 return False;
2780 {
2781 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2782 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2783 FiLine* line = &fi->lines[lineno];
2784 UWord loff = (a - atag) / 8;
2785 UShort mask = 0xF << (2 * (a & 6));
2786 /* mask is F000, 0F00, 00F0 or 000F */
2787 if (LIKELY( fi->tags[lineno] == atag )) {
2788 /* hit. check line and update. */
2789 UShort u16 = line->u16s[loff];
2790 Bool ok = (u16 & mask) == mask; /* 2 x R & W bits set? */
2791 line->u16s[loff] = u16 | mask; /* set them */
2792 return ok;
2793 } else {
2794 /* miss. nuke existing line and re-use it. */
2795 UWord i;
2796 fi->tags[lineno] = atag;
2797 for (i = 0; i < FI_LINE_SZB / 8; i++)
2798 line->u16s[i] = 0;
2799 line->u16s[loff] = mask;
2800 return False;
2801 }
2802 }
2803}
2804
2805static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
2806{
2807 {
2808 Addr atag = FI_GET_TAG(a); /* tag of 'a' */
2809 UWord lineno = FI_GET_LINENO(a); /* lineno for 'a' */
2810 FiLine* line = &fi->lines[lineno];
2811 UWord loff = (a - atag) / 8;
2812 UShort mask = 0x3 << (2 * (a & 7));
2813 /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
2814 if (LIKELY( fi->tags[lineno] == atag )) {
2815 /* hit. check line and update. */
2816 UShort u16 = line->u16s[loff];
2817 Bool ok = (u16 & mask) == mask; /* 1 x R bits set? */
2818 line->u16s[loff] = u16 | mask; /* set them */
2819 return ok;
2820 } else {
2821 /* miss. nuke existing line and re-use it. */
2822 UWord i;
2823 fi->tags[lineno] = atag;
2824 for (i = 0; i < FI_LINE_SZB / 8; i++)
2825 line->u16s[i] = 0;
2826 line->u16s[loff] = mask;
2827 return False;
2828 }
2829 }
2830}
2831
sewardjf98e1c02008-10-25 16:22:41 +00002832
2833/////////////////////////////////////////////////////////
2834// //
2835// Threads //
2836// //
2837/////////////////////////////////////////////////////////
2838
sewardj23f12002009-07-24 08:45:08 +00002839// QQQ move this somewhere else
2840typedef struct { ULong ull; ExeContext* ec; } ULong_n_EC;
2841
sewardj8ab2c132009-08-02 09:34:35 +00002842/* How many of the above records to collect for each thread? Older
2843 ones are dumped when we run out of space. 62.5k requires 1MB per
2844 thread, since each ULong_n_EC record is 16 bytes long. When more
2845 than N_KWs_N_STACKs_PER_THREAD are present, the older half are
2846 deleted to make space. Hence in the worst case we will be able to
2847 produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
2848 Kw transitions (segments in this thread). For the current setting
2849 that gives a guaranteed stack for at least the last 31.25k
2850 segments. */
2851#define N_KWs_N_STACKs_PER_THREAD 62500
2852
2853
sewardjf98e1c02008-10-25 16:22:41 +00002854struct _Thr {
2855 /* Current VTSs for this thread. They change as we go along. viR
2856 is the VTS to be used for reads, viW for writes. Usually they
2857 are the same, but can differ when we deal with reader-writer
sewardj23f12002009-07-24 08:45:08 +00002858 locks. It is always the case that
2859 VtsID__cmpLEQ(viW,viR) == True
2860 that is, viW must be the same, or lagging behind, viR. */
sewardjf98e1c02008-10-25 16:22:41 +00002861 VtsID viR;
2862 VtsID viW;
sewardj23f12002009-07-24 08:45:08 +00002863
2864 /* Is initially False, and is set to true after the thread really
2865 has done a low-level exit. */
2866 Bool still_alive;
2867
2868 /* A filter that removes references for which we believe that
2869 msmcread/msmcwrite will not change the state, nor report a
2870 race. */
2871 Filter* filter;
2872
sewardjf98e1c02008-10-25 16:22:41 +00002873 /* opaque (to us) data we hold on behalf of the library's user. */
2874 void* opaque;
sewardj23f12002009-07-24 08:45:08 +00002875
sewardj8ab2c132009-08-02 09:34:35 +00002876 /* The ULongs (scalar Kws) in this accumulate in strictly
sewardj23f12002009-07-24 08:45:08 +00002877 increasing order, without duplicates. This is important because
sewardj8ab2c132009-08-02 09:34:35 +00002878 we need to be able to find a given scalar Kw in this array
sewardj23f12002009-07-24 08:45:08 +00002879 later, by binary search. */
sewardj8ab2c132009-08-02 09:34:35 +00002880 XArray* /* ULong_n_EC */ local_Kws_n_stacks;
sewardjf98e1c02008-10-25 16:22:41 +00002881};
2882
2883static Thr* Thr__new ( void ) {
2884 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2885 thr->viR = VtsID_INVALID;
2886 thr->viW = VtsID_INVALID;
sewardj23f12002009-07-24 08:45:08 +00002887 thr->still_alive = True;
2888 thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
sewardj8ab2c132009-08-02 09:34:35 +00002889 thr->local_Kws_n_stacks
2890 = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.3 (local_Kws_and_stacks)",
sewardj23f12002009-07-24 08:45:08 +00002891 HG_(free), sizeof(ULong_n_EC) );
sewardjf98e1c02008-10-25 16:22:41 +00002892 return thr;
2893}
2894
sewardj8ab2c132009-08-02 09:34:35 +00002895static void note_local_Kw_n_stack_for ( Thr* thr )
sewardj23f12002009-07-24 08:45:08 +00002896{
2897 Word nPresent;
2898 ULong_n_EC pair;
2899 tl_assert(thr);
sewardjb7126172009-07-26 19:50:06 +00002900
2901 // We only collect this info at history level 1 (approx)
2902 if (HG_(clo_history_level) != 1)
2903 return;
2904
sewardj8ab2c132009-08-02 09:34:35 +00002905 /* This is the scalar Kw for thr. */
2906 pair.ull = VtsID__indexAt( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00002907 pair.ec = main_get_EC( thr );
2908 tl_assert(pair.ec);
sewardj8ab2c132009-08-02 09:34:35 +00002909 tl_assert(thr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00002910
2911 /* check that we're not adding duplicates */
sewardj8ab2c132009-08-02 09:34:35 +00002912 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
sewardj23f12002009-07-24 08:45:08 +00002913
2914 /* Throw away old stacks, if necessary. We can't accumulate stuff
2915 indefinitely. */
sewardj8ab2c132009-08-02 09:34:35 +00002916 if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
2917 VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
2918 nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
2919 if (0)
2920 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p (!!! gc !!!)\n",
sewardj23f12002009-07-24 08:45:08 +00002921 thr, pair.ull, pair.ec );
2922 }
2923
2924 if (nPresent > 0) {
2925 ULong_n_EC* prevPair
sewardj8ab2c132009-08-02 09:34:35 +00002926 = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
2927 tl_assert( prevPair->ull <= pair.ull );
sewardj23f12002009-07-24 08:45:08 +00002928 }
2929
2930 if (nPresent == 0)
2931 pair.ec = NULL;
2932
sewardj8ab2c132009-08-02 09:34:35 +00002933 VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
sewardj23f12002009-07-24 08:45:08 +00002934
2935 if (0)
sewardj8ab2c132009-08-02 09:34:35 +00002936 VG_(printf)("LOCAL Kw: thr %p, Kw %llu, ec %p\n",
sewardj23f12002009-07-24 08:45:08 +00002937 thr, pair.ull, pair.ec );
2938 if (0)
2939 VG_(pp_ExeContext)(pair.ec);
2940}
2941
2942static Int cmp__ULong_n_EC__by_ULong ( ULong_n_EC* pair1, ULong_n_EC* pair2 )
2943{
2944 if (pair1->ull < pair2->ull) return -1;
2945 if (pair1->ull > pair2->ull) return 1;
2946 return 0;
2947}
2948
sewardjf98e1c02008-10-25 16:22:41 +00002949
2950/////////////////////////////////////////////////////////
2951// //
2952// Shadow Values //
2953// //
2954/////////////////////////////////////////////////////////
2955
2956// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
2957// hb_zsm.h. We have to do everything else here.
2958
2959/* SVal is 64 bit unsigned int.
2960
2961 <---------30---------> <---------30--------->
2962 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
sewardjf98e1c02008-10-25 16:22:41 +00002963 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
sewardj23f12002009-07-24 08:45:08 +00002964 11 0--------------------0 00 0--------------------0 A: SVal_INVALID
2965
sewardjf98e1c02008-10-25 16:22:41 +00002966*/
2967#define SVAL_TAGMASK (3ULL << 62)
2968
2969static inline Bool SVal__isC ( SVal s ) {
2970 return (0ULL << 62) == (s & SVAL_TAGMASK);
2971}
2972static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
2973 //tl_assert(VtsID__is_valid(rmini));
2974 //tl_assert(VtsID__is_valid(wmini));
2975 return (((ULong)rmini) << 32) | ((ULong)wmini);
2976}
2977static inline VtsID SVal__unC_Rmin ( SVal s ) {
2978 tl_assert(SVal__isC(s));
2979 return (VtsID)(s >> 32);
2980}
2981static inline VtsID SVal__unC_Wmin ( SVal s ) {
2982 tl_assert(SVal__isC(s));
2983 return (VtsID)(s & 0xFFFFFFFFULL);
2984}
2985
sewardj23f12002009-07-24 08:45:08 +00002986static inline Bool SVal__isA ( SVal s ) {
sewardjf98e1c02008-10-25 16:22:41 +00002987 return (2ULL << 62) == (s & SVAL_TAGMASK);
2988}
sewardj23f12002009-07-24 08:45:08 +00002989static inline SVal SVal__mkA ( void ) {
sewardjf98e1c02008-10-25 16:22:41 +00002990 return 2ULL << 62;
2991}
2992
2993/* Direct callback from lib_zsm. */
2994static void SVal__rcinc ( SVal s ) {
2995 if (SVal__isC(s)) {
2996 VtsID__rcinc( SVal__unC_Rmin(s) );
2997 VtsID__rcinc( SVal__unC_Wmin(s) );
2998 }
2999}
3000
3001/* Direct callback from lib_zsm. */
3002static void SVal__rcdec ( SVal s ) {
3003 if (SVal__isC(s)) {
3004 VtsID__rcdec( SVal__unC_Rmin(s) );
3005 VtsID__rcdec( SVal__unC_Wmin(s) );
3006 }
3007}
3008
3009
3010/////////////////////////////////////////////////////////
3011// //
sewardjd86e3a22008-12-03 11:39:37 +00003012// A simple group (memory) allocator //
3013// //
3014/////////////////////////////////////////////////////////
3015
3016//////////////// BEGIN general group allocator
3017typedef
3018 struct {
3019 UWord elemSzB; /* element size */
3020 UWord nPerGroup; /* # elems per group */
3021 void* (*alloc)(HChar*, SizeT); /* group allocator */
3022 HChar* cc; /* group allocator's cc */
3023 void (*free)(void*); /* group allocator's free-er (unused) */
3024 /* XArray of void* (pointers to groups). The groups themselves.
3025 Each element is a pointer to a block of size (elemSzB *
3026 nPerGroup) bytes. */
3027 XArray* groups;
3028 /* next free element. Is a pointer to an element in one of the
3029 groups pointed to by .groups. */
3030 void* nextFree;
3031 }
3032 GroupAlloc;
3033
3034static void init_GroupAlloc ( /*MOD*/GroupAlloc* ga,
3035 UWord elemSzB,
3036 UWord nPerGroup,
3037 void* (*alloc)(HChar*, SizeT),
3038 HChar* cc,
3039 void (*free)(void*) )
3040{
3041 tl_assert(0 == (elemSzB % sizeof(UWord)));
3042 tl_assert(elemSzB >= sizeof(UWord));
3043 tl_assert(nPerGroup >= 100); /* let's say */
3044 tl_assert(alloc);
3045 tl_assert(cc);
3046 tl_assert(free);
3047 tl_assert(ga);
3048 VG_(memset)(ga, 0, sizeof(*ga));
3049 ga->elemSzB = elemSzB;
3050 ga->nPerGroup = nPerGroup;
3051 ga->groups = NULL;
3052 ga->alloc = alloc;
3053 ga->cc = cc;
3054 ga->free = free;
3055 ga->groups = VG_(newXA)( alloc, cc, free, sizeof(void*) );
3056 ga->nextFree = NULL;
3057 tl_assert(ga->groups);
3058}
3059
3060/* The freelist is empty. Allocate a new group and put all the new
3061 elements in it onto the freelist. */
3062__attribute__((noinline))
3063static void gal_add_new_group ( GroupAlloc* ga )
3064{
3065 Word i;
3066 UWord* group;
3067 tl_assert(ga);
3068 tl_assert(ga->nextFree == NULL);
3069 group = ga->alloc( ga->cc, ga->elemSzB * ga->nPerGroup );
3070 tl_assert(group);
3071 /* extend the freelist through the new group. Place the freelist
3072 pointer in the first word of each element. That's why the
3073 element size must be at least one word. */
3074 for (i = ga->nPerGroup-1; i >= 0; i--) {
3075 UChar* elemC = ((UChar*)group) + i * ga->elemSzB;
3076 UWord* elem = (UWord*)elemC;
3077 tl_assert(0 == (((UWord)elem) % sizeof(UWord)));
3078 *elem = (UWord)ga->nextFree;
3079 ga->nextFree = elem;
3080 }
3081 /* and add to our collection of groups */
3082 VG_(addToXA)( ga->groups, &group );
3083}
3084
3085inline static void* gal_Alloc ( GroupAlloc* ga )
3086{
3087 UWord* elem;
3088 if (UNLIKELY(ga->nextFree == NULL)) {
3089 gal_add_new_group(ga);
3090 }
3091 elem = ga->nextFree;
3092 ga->nextFree = (void*)*elem;
3093 *elem = 0; /* unnecessary, but just to be on the safe side */
3094 return elem;
3095}
3096
3097inline static void* gal_Alloc_w_size_check ( GroupAlloc* ga, SizeT n )
3098{
3099 tl_assert(n == ga->elemSzB);
3100 return gal_Alloc( ga );
3101}
3102
3103inline static void gal_Free ( GroupAlloc* ga, void* p )
3104{
3105 UWord* elem = (UWord*)p;
3106 *elem = (UWord)ga->nextFree;
3107 ga->nextFree = elem;
3108}
3109//////////////// END general group allocator
3110
3111
3112/////////////////////////////////////////////////////////
3113// //
sewardjf98e1c02008-10-25 16:22:41 +00003114// Change-event map2 //
3115// //
3116/////////////////////////////////////////////////////////
3117
sewardjf98e1c02008-10-25 16:22:41 +00003118#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
3119
3120/* This is in two parts:
3121
sewardj23f12002009-07-24 08:45:08 +00003122 1. A hash table of RCECs. This is a set of reference-counted stack
sewardjf98e1c02008-10-25 16:22:41 +00003123 traces. When the reference count of a stack trace becomes zero,
3124 it is removed from the set and freed up. The intent is to have
3125 a set of stack traces which can be referred to from (2), but to
3126 only represent each one once. The set is indexed/searched by
3127 ordering on the stack trace vectors.
3128
sewardj849b0ed2008-12-21 10:43:10 +00003129 2. A SparseWA of OldRefs. These store information about each old
3130 ref that we need to record. It is indexed by address of the
sewardjf98e1c02008-10-25 16:22:41 +00003131 location for which the information is recorded. For LRU
3132 purposes, each OldRef also contains a generation number,
3133 indicating when it was most recently accessed.
3134
3135 The important part of an OldRef is, however, its accs[] array.
sewardj849b0ed2008-12-21 10:43:10 +00003136 This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3137 size) triples to RCECs. This allows us to collect the last
3138 access-traceback by up to N_OLDREF_ACCS different triples for
3139 this location. The accs[] array is a MTF-array. If a binding
3140 falls off the end, that's too bad -- we will lose info about
3141 that triple's access to this location.
sewardjf98e1c02008-10-25 16:22:41 +00003142
sewardj849b0ed2008-12-21 10:43:10 +00003143 When the SparseWA becomes too big, we can throw away the OldRefs
sewardjf98e1c02008-10-25 16:22:41 +00003144 whose generation numbers are below some threshold; hence doing
3145 approximate LRU discarding. For each discarded OldRef we must
3146 of course decrement the reference count on the all RCECs it
3147 refers to, in order that entries from (1) eventually get
3148 discarded too.
sewardj849b0ed2008-12-21 10:43:10 +00003149
3150 A major improvement in reliability of this mechanism would be to
3151 have a dynamically sized OldRef.accs[] array, so no entries ever
3152 fall off the end. In investigations (Dec 08) it appears that a
3153 major cause for the non-availability of conflicting-access traces
3154 in race reports is caused by the fixed size of this array. I
3155 suspect for most OldRefs, only a few entries are used, but for a
3156 minority of cases there is an overflow, leading to info lossage.
3157 Investigations also suggest this is very workload and scheduling
3158 sensitive. Therefore a dynamic sizing would be better.
3159
3160 However, dynamic sizing would defeat the use of a GroupAllocator
3161 for OldRef structures. And that's important for performance. So
3162 it's not straightforward to do.
sewardjf98e1c02008-10-25 16:22:41 +00003163*/
3164
3165
3166static UWord stats__ctxt_rcdec1 = 0;
3167static UWord stats__ctxt_rcdec2 = 0;
3168static UWord stats__ctxt_rcdec3 = 0;
3169static UWord stats__ctxt_rcdec_calls = 0;
3170static UWord stats__ctxt_rcdec_discards = 0;
3171static UWord stats__ctxt_rcdec1_eq = 0;
3172
3173static UWord stats__ctxt_tab_curr = 0;
3174static UWord stats__ctxt_tab_max = 0;
3175
3176static UWord stats__ctxt_tab_qs = 0;
3177static UWord stats__ctxt_tab_cmps = 0;
3178
3179
3180///////////////////////////////////////////////////////
3181//// Part (1): An OSet of RCECs
3182///
3183
3184#define N_FRAMES 8
3185
3186// (UInt) `echo "Reference Counted Execution Context" | md5sum`
3187#define RCEC_MAGIC 0xab88abb2UL
3188
3189//#define N_RCEC_TAB 98317 /* prime */
3190#define N_RCEC_TAB 196613 /* prime */
3191
3192typedef
3193 struct _RCEC {
sewardjd86e3a22008-12-03 11:39:37 +00003194 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003195 struct _RCEC* next;
sewardjf98e1c02008-10-25 16:22:41 +00003196 UWord rc;
3197 UWord rcX; /* used for crosschecking */
njn6c83d5e2009-05-05 23:46:24 +00003198 UWord frames_hash; /* hash of all the frames */
3199 UWord frames[N_FRAMES];
sewardjf98e1c02008-10-25 16:22:41 +00003200 }
3201 RCEC;
3202
3203static RCEC** contextTab = NULL; /* hash table of RCEC*s */
3204
3205
3206/* Gives an arbitrary total order on RCEC .frames fields */
3207static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
3208 Word i;
3209 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
3210 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
njn6c83d5e2009-05-05 23:46:24 +00003211 if (ec1->frames_hash < ec2->frames_hash) return -1;
3212 if (ec1->frames_hash > ec2->frames_hash) return 1;
3213 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003214 if (ec1->frames[i] < ec2->frames[i]) return -1;
njn6c83d5e2009-05-05 23:46:24 +00003215 if (ec1->frames[i] > ec2->frames[i]) return 1;
sewardjf98e1c02008-10-25 16:22:41 +00003216 }
3217 return 0;
3218}
3219
3220
3221/* Dec the ref of this RCEC. */
3222static void ctxt__rcdec ( RCEC* ec )
3223{
3224 stats__ctxt_rcdec_calls++;
3225 tl_assert(ec && ec->magic == RCEC_MAGIC);
3226 tl_assert(ec->rc > 0);
3227 ec->rc--;
3228}
3229
3230static void ctxt__rcinc ( RCEC* ec )
3231{
3232 tl_assert(ec && ec->magic == RCEC_MAGIC);
3233 ec->rc++;
3234}
3235
3236
sewardjd86e3a22008-12-03 11:39:37 +00003237//////////// BEGIN RCEC group allocator
3238static GroupAlloc rcec_group_allocator;
3239
3240static RCEC* alloc_RCEC ( void ) {
3241 return gal_Alloc ( &rcec_group_allocator );
3242}
3243
3244static void free_RCEC ( RCEC* rcec ) {
3245 tl_assert(rcec->magic == RCEC_MAGIC);
3246 gal_Free( &rcec_group_allocator, rcec );
3247}
3248//////////// END OldRef group allocator
3249
3250
sewardjf98e1c02008-10-25 16:22:41 +00003251/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
3252 move it one step closer the the front of the list, so as to make
3253 subsequent searches for it cheaper. */
3254static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
3255{
3256 RCEC *ec0, *ec1, *ec2;
3257 if (ec == *headp)
3258 tl_assert(0); /* already at head of list */
3259 tl_assert(ec != NULL);
3260 ec0 = *headp;
3261 ec1 = NULL;
3262 ec2 = NULL;
3263 while (True) {
3264 if (ec0 == NULL || ec0 == ec) break;
3265 ec2 = ec1;
3266 ec1 = ec0;
3267 ec0 = ec0->next;
3268 }
3269 tl_assert(ec0 == ec);
3270 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
3271 RCEC* tmp;
3272 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
3273 predecessor. Swap ec0 and ec1, that is, move ec0 one step
3274 closer to the start of the list. */
3275 tl_assert(ec2->next == ec1);
3276 tl_assert(ec1->next == ec0);
3277 tmp = ec0->next;
3278 ec2->next = ec0;
3279 ec0->next = ec1;
3280 ec1->next = tmp;
3281 }
3282 else
3283 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
3284 /* it's second in the list. */
3285 tl_assert(*headp == ec1);
3286 tl_assert(ec1->next == ec0);
3287 ec1->next = ec0->next;
3288 ec0->next = ec1;
3289 *headp = ec0;
3290 }
3291}
3292
3293
3294/* Find the given RCEC in the tree, and return a pointer to it. Or,
3295 if not present, add the given one to the tree (by making a copy of
3296 it, so the caller can immediately deallocate the original) and
3297 return a pointer to the copy. The caller can safely have 'example'
3298 on its stack, since we will always return a pointer to a copy of
3299 it, not to the original. Note that the inserted node will have .rc
3300 of zero and so the caller must immediatly increment it. */
3301__attribute__((noinline))
3302static RCEC* ctxt__find_or_add ( RCEC* example )
3303{
3304 UWord hent;
3305 RCEC* copy;
3306 tl_assert(example && example->magic == RCEC_MAGIC);
3307 tl_assert(example->rc == 0);
3308
3309 /* Search the hash table to see if we already have it. */
3310 stats__ctxt_tab_qs++;
njn6c83d5e2009-05-05 23:46:24 +00003311 hent = example->frames_hash % N_RCEC_TAB;
sewardjf98e1c02008-10-25 16:22:41 +00003312 copy = contextTab[hent];
3313 while (1) {
3314 if (!copy) break;
3315 tl_assert(copy->magic == RCEC_MAGIC);
3316 stats__ctxt_tab_cmps++;
3317 if (0 == RCEC__cmp_by_frames(copy, example)) break;
3318 copy = copy->next;
3319 }
3320
3321 if (copy) {
3322 tl_assert(copy != example);
3323 /* optimisation: if it's not at the head of its list, move 1
3324 step fwds, to make future searches cheaper */
3325 if (copy != contextTab[hent]) {
3326 move_RCEC_one_step_forward( &contextTab[hent], copy );
3327 }
3328 } else {
sewardjd86e3a22008-12-03 11:39:37 +00003329 copy = alloc_RCEC();
sewardjf98e1c02008-10-25 16:22:41 +00003330 tl_assert(copy != example);
3331 *copy = *example;
3332 copy->next = contextTab[hent];
3333 contextTab[hent] = copy;
3334 stats__ctxt_tab_curr++;
3335 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
3336 stats__ctxt_tab_max = stats__ctxt_tab_curr;
3337 }
3338 return copy;
3339}
3340
3341static inline UWord ROLW ( UWord w, Int n )
3342{
3343 Int bpw = 8 * sizeof(UWord);
3344 w = (w << n) | (w >> (bpw-n));
3345 return w;
3346}
3347
3348__attribute__((noinline))
3349static RCEC* get_RCEC ( Thr* thr )
3350{
3351 UWord hash, i;
3352 RCEC example;
3353 example.magic = RCEC_MAGIC;
3354 example.rc = 0;
3355 example.rcX = 0;
njn6c83d5e2009-05-05 23:46:24 +00003356 main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
sewardjf98e1c02008-10-25 16:22:41 +00003357 hash = 0;
njn6c83d5e2009-05-05 23:46:24 +00003358 for (i = 0; i < N_FRAMES; i++) {
sewardjf98e1c02008-10-25 16:22:41 +00003359 hash ^= example.frames[i];
3360 hash = ROLW(hash, 19);
3361 }
njn6c83d5e2009-05-05 23:46:24 +00003362 example.frames_hash = hash;
sewardjf98e1c02008-10-25 16:22:41 +00003363 return ctxt__find_or_add( &example );
3364}
3365
3366///////////////////////////////////////////////////////
sewardjbc307e52008-12-06 22:10:54 +00003367//// Part (2):
3368/// A SparseWA guest-addr -> OldRef, that refers to (1)
sewardjf98e1c02008-10-25 16:22:41 +00003369///
3370
3371// (UInt) `echo "Old Reference Information" | md5sum`
3372#define OldRef_MAGIC 0x30b1f075UL
3373
sewardjc5ea9962008-12-07 01:41:46 +00003374/* Records an access: a thread and a context. The size
3375 (1,2,4,8) and read-or-writeness are also encoded as
3376 follows: bottom bit of .thr is 1 if write, 0 if read
3377 bottom 2 bits of .rcec are encode size:
3378 00 = 1, 01 = 2, 10 = 4, 11 = 8
3379*/
sewardjf98e1c02008-10-25 16:22:41 +00003380typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
3381
sewardj849b0ed2008-12-21 10:43:10 +00003382#define N_OLDREF_ACCS 5
sewardjf98e1c02008-10-25 16:22:41 +00003383
3384typedef
3385 struct {
sewardjd86e3a22008-12-03 11:39:37 +00003386 UWord magic; /* sanity check only */
sewardjf98e1c02008-10-25 16:22:41 +00003387 UWord gen; /* when most recently accessed */
sewardjd86e3a22008-12-03 11:39:37 +00003388 /* or free list when not in use */
sewardjf98e1c02008-10-25 16:22:41 +00003389 /* unused slots in this array have .thr == NULL */
3390 Thr_n_RCEC accs[N_OLDREF_ACCS];
3391 }
3392 OldRef;
3393
sewardjd86e3a22008-12-03 11:39:37 +00003394
3395//////////// BEGIN OldRef group allocator
3396static GroupAlloc oldref_group_allocator;
3397
3398static OldRef* alloc_OldRef ( void ) {
3399 return gal_Alloc ( &oldref_group_allocator );
3400}
3401
3402static void free_OldRef ( OldRef* r ) {
3403 tl_assert(r->magic == OldRef_MAGIC);
3404 gal_Free( &oldref_group_allocator, r );
3405}
3406//////////// END OldRef group allocator
3407
sewardjd86e3a22008-12-03 11:39:37 +00003408
sewardjbc307e52008-12-06 22:10:54 +00003409static SparseWA* oldrefTree = NULL; /* SparseWA* OldRef* */
3410static UWord oldrefGen = 0; /* current LRU generation # */
3411static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
3412static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
sewardjf98e1c02008-10-25 16:22:41 +00003413
sewardjc5ea9962008-12-07 01:41:46 +00003414inline static void* ptr_or_UWord ( void* p, UWord w ) {
3415 return (void*)( ((UWord)p) | ((UWord)w) );
3416}
3417inline static void* ptr_and_UWord ( void* p, UWord w ) {
3418 return (void*)( ((UWord)p) & ((UWord)w) );
3419}
3420
sewardj1669cc72008-12-13 01:20:21 +00003421inline static UInt min_UInt ( UInt a, UInt b ) {
3422 return a < b ? a : b;
3423}
3424
sewardja781be62008-12-08 00:12:28 +00003425/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
3426 first interval is lower, 1 if the first interval is higher, and 0
3427 if there is any overlap. Redundant paranoia with casting is there
3428 following what looked distinctly like a bug in gcc-4.1.2, in which
3429 some of the comparisons were done signedly instead of
3430 unsignedly. */
3431/* Copied from exp-ptrcheck/sg_main.c */
3432static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
3433 Addr a2, SizeT n2 ) {
3434 UWord a1w = (UWord)a1;
3435 UWord n1w = (UWord)n1;
3436 UWord a2w = (UWord)a2;
3437 UWord n2w = (UWord)n2;
3438 tl_assert(n1w > 0 && n2w > 0);
3439 if (a1w + n1w <= a2w) return -1L;
3440 if (a2w + n2w <= a1w) return 1L;
3441 return 0;
3442}
3443
sewardjc5ea9962008-12-07 01:41:46 +00003444static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
sewardjf98e1c02008-10-25 16:22:41 +00003445{
sewardjd86e3a22008-12-03 11:39:37 +00003446 OldRef* ref;
sewardjc5ea9962008-12-07 01:41:46 +00003447 RCEC* rcec;
sewardjd86e3a22008-12-03 11:39:37 +00003448 Word i, j;
3449 UWord keyW, valW;
3450 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003451
sewardjc5ea9962008-12-07 01:41:46 +00003452 rcec = get_RCEC( thr );
3453 ctxt__rcinc(rcec);
3454
3455 /* encode the size and writeness of the transaction in the bottom
3456 two bits of thr and rcec. */
3457 thr = ptr_or_UWord(thr, isW ? 1 : 0);
3458 switch (szB) {
3459 /* This doesn't look particularly branch-predictor friendly. */
3460 case 1: rcec = ptr_or_UWord(rcec, 0); break;
3461 case 2: rcec = ptr_or_UWord(rcec, 1); break;
3462 case 4: rcec = ptr_or_UWord(rcec, 2); break;
3463 case 8: rcec = ptr_or_UWord(rcec, 3); break;
3464 default: tl_assert(0);
3465 }
3466
3467 /* Look in the map to see if we already have this. */
sewardjbc307e52008-12-06 22:10:54 +00003468 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
sewardjf98e1c02008-10-25 16:22:41 +00003469
sewardjd86e3a22008-12-03 11:39:37 +00003470 if (b) {
sewardjf98e1c02008-10-25 16:22:41 +00003471
3472 /* We already have a record for this address. We now need to
sewardj849b0ed2008-12-21 10:43:10 +00003473 see if we have a stack trace pertaining to this (thread, R/W,
3474 size) triple. */
sewardjd86e3a22008-12-03 11:39:37 +00003475 tl_assert(keyW == a);
3476 ref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003477 tl_assert(ref->magic == OldRef_MAGIC);
3478
3479 tl_assert(thr);
3480 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardj849b0ed2008-12-21 10:43:10 +00003481 if (ref->accs[i].thr != thr)
3482 continue;
3483 /* since .thr encodes both the accessing thread and the
3484 read/writeness, we know now that at least those features
3485 of the access match this entry. So we just need to check
3486 the size indication. Do this by inspecting the lowest 2 bits of
3487 .rcec, which contain the encoded size info. */
3488 if (ptr_and_UWord(ref->accs[i].rcec,3) != ptr_and_UWord(rcec,3))
3489 continue;
3490 /* else we have a match, so stop looking. */
3491 break;
sewardjf98e1c02008-10-25 16:22:41 +00003492 }
3493
3494 if (i < N_OLDREF_ACCS) {
3495 /* thread 'thr' has an entry at index 'i'. Update it. */
3496 if (i > 0) {
3497 Thr_n_RCEC tmp = ref->accs[i-1];
3498 ref->accs[i-1] = ref->accs[i];
3499 ref->accs[i] = tmp;
3500 i--;
3501 }
sewardjc5ea9962008-12-07 01:41:46 +00003502 if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
sewardjf98e1c02008-10-25 16:22:41 +00003503 stats__ctxt_rcdec1++;
sewardjc5ea9962008-12-07 01:41:46 +00003504 ctxt__rcdec( ptr_and_UWord(ref->accs[i].rcec, ~3) );
3505 ref->accs[i].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003506 tl_assert(ref->accs[i].thr == thr);
3507 } else {
sewardj849b0ed2008-12-21 10:43:10 +00003508 /* No entry for this (thread, R/W, size) triple. Shuffle all
3509 of them down one slot, and put the new entry at the start
3510 of the array. */
sewardjf98e1c02008-10-25 16:22:41 +00003511 if (ref->accs[N_OLDREF_ACCS-1].thr) {
3512 /* the last slot is in use. We must dec the rc on the
3513 associated rcec. */
3514 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
3515 stats__ctxt_rcdec2++;
sewardj849b0ed2008-12-21 10:43:10 +00003516 if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
3517 VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
sewardjc5ea9962008-12-07 01:41:46 +00003518 ctxt__rcdec( ptr_and_UWord(ref->accs[N_OLDREF_ACCS-1].rcec, ~3) );
sewardjf98e1c02008-10-25 16:22:41 +00003519 } else {
3520 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
3521 }
3522 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
3523 ref->accs[j] = ref->accs[j-1];
3524 ref->accs[0].thr = thr;
sewardjc5ea9962008-12-07 01:41:46 +00003525 ref->accs[0].rcec = rcec;
3526 /* thr==NULL is used to signify an empty slot, so we can't
3527 add a NULL thr. */
3528 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003529 }
3530
3531 ref->gen = oldrefGen;
sewardjf98e1c02008-10-25 16:22:41 +00003532
3533 } else {
3534
3535 /* We don't have a record for this address. Create a new one. */
3536 if (oldrefTreeN >= oldrefGenIncAt) {
3537 oldrefGen++;
3538 oldrefGenIncAt = oldrefTreeN + 50000;
3539 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
3540 oldrefGen, oldrefTreeN );
3541 }
sewardjd86e3a22008-12-03 11:39:37 +00003542
3543 ref = alloc_OldRef();
sewardjf98e1c02008-10-25 16:22:41 +00003544 ref->magic = OldRef_MAGIC;
3545 ref->gen = oldrefGen;
sewardjc5ea9962008-12-07 01:41:46 +00003546 ref->accs[0].rcec = rcec;
sewardjf98e1c02008-10-25 16:22:41 +00003547 ref->accs[0].thr = thr;
sewardj849b0ed2008-12-21 10:43:10 +00003548 /* thr==NULL is used to signify an empty slot, so we can't add a
3549 NULL thr. */
3550 tl_assert(ptr_and_UWord(thr, ~3) != 0);
sewardjf98e1c02008-10-25 16:22:41 +00003551 for (j = 1; j < N_OLDREF_ACCS; j++) {
3552 ref->accs[j].thr = NULL;
3553 ref->accs[j].rcec = NULL;
3554 }
sewardjbc307e52008-12-06 22:10:54 +00003555 VG_(addToSWA)( oldrefTree, a, (UWord)ref );
sewardjf98e1c02008-10-25 16:22:41 +00003556 oldrefTreeN++;
3557
3558 }
3559}
3560
3561
sewardjc5ea9962008-12-07 01:41:46 +00003562Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
3563 /*OUT*/Thr** resThr,
3564 /*OUT*/SizeT* resSzB,
3565 /*OUT*/Bool* resIsW,
3566 Thr* thr, Addr a, SizeT szB, Bool isW )
sewardjf98e1c02008-10-25 16:22:41 +00003567{
sewardja781be62008-12-08 00:12:28 +00003568 Word i, j;
sewardjd86e3a22008-12-03 11:39:37 +00003569 OldRef* ref;
3570 UWord keyW, valW;
3571 Bool b;
sewardjf98e1c02008-10-25 16:22:41 +00003572
sewardjc5ea9962008-12-07 01:41:46 +00003573 Thr* cand_thr;
3574 RCEC* cand_rcec;
3575 Bool cand_isW;
3576 SizeT cand_szB;
sewardja781be62008-12-08 00:12:28 +00003577 Addr cand_a;
3578
3579 Addr toCheck[15];
3580 Int nToCheck = 0;
sewardjc5ea9962008-12-07 01:41:46 +00003581
3582 tl_assert(thr);
3583 tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
sewardjf98e1c02008-10-25 16:22:41 +00003584
sewardja781be62008-12-08 00:12:28 +00003585 toCheck[nToCheck++] = a;
3586 for (i = -7; i < (Word)szB; i++) {
3587 if (i != 0)
3588 toCheck[nToCheck++] = a + i;
3589 }
3590 tl_assert(nToCheck <= 15);
3591
3592 /* Now see if we can find a suitable matching event for
3593 any of the addresses in toCheck[0 .. nToCheck-1]. */
3594 for (j = 0; j < nToCheck; j++) {
3595
3596 cand_a = toCheck[j];
3597 // VG_(printf)("test %ld %p\n", j, cand_a);
3598
3599 b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
3600 if (!b)
3601 continue;
3602
sewardjd86e3a22008-12-03 11:39:37 +00003603 ref = (OldRef*)valW;
sewardja781be62008-12-08 00:12:28 +00003604 tl_assert(keyW == cand_a);
sewardjf98e1c02008-10-25 16:22:41 +00003605 tl_assert(ref->magic == OldRef_MAGIC);
3606 tl_assert(ref->accs[0].thr); /* first slot must always be used */
3607
sewardjc5ea9962008-12-07 01:41:46 +00003608 cand_thr = NULL;
3609 cand_rcec = NULL;
3610 cand_isW = False;
3611 cand_szB = 0;
sewardjf98e1c02008-10-25 16:22:41 +00003612
sewardjc5ea9962008-12-07 01:41:46 +00003613 for (i = 0; i < N_OLDREF_ACCS; i++) {
3614 Thr_n_RCEC* cand = &ref->accs[i];
3615 cand_thr = ptr_and_UWord(cand->thr, ~3);
3616 cand_rcec = ptr_and_UWord(cand->rcec, ~3);
3617 /* Decode the writeness from the bottom bit of .thr. */
3618 cand_isW = 1 == (UWord)ptr_and_UWord(cand->thr, 1);
3619 /* Decode the size from the bottom two bits of .rcec. */
3620 switch ((UWord)ptr_and_UWord(cand->rcec, 3)) {
3621 case 0: cand_szB = 1; break;
3622 case 1: cand_szB = 2; break;
3623 case 2: cand_szB = 4; break;
3624 case 3: cand_szB = 8; break;
3625 default: tl_assert(0);
3626 }
3627
3628 if (cand_thr == NULL)
3629 /* This slot isn't in use. Ignore it. */
3630 continue;
3631
3632 if (cand_thr == thr)
3633 /* This is an access by the same thread, but we're only
3634 interested in accesses from other threads. Ignore. */
3635 continue;
3636
3637 if ((!cand_isW) && (!isW))
3638 /* We don't want to report a read racing against another
3639 read; that's stupid. So in this case move on. */
3640 continue;
3641
sewardja781be62008-12-08 00:12:28 +00003642 if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
3643 /* No overlap with the access we're asking about. Ignore. */
3644 continue;
3645
sewardjc5ea9962008-12-07 01:41:46 +00003646 /* We have a match. Stop searching. */
3647 break;
3648 }
3649
3650 tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
3651
sewardja781be62008-12-08 00:12:28 +00003652 if (i < N_OLDREF_ACCS) {
njn3a4b58f2009-05-07 23:08:10 +00003653 Int n, maxNFrames;
sewardja781be62008-12-08 00:12:28 +00003654 /* return with success */
3655 tl_assert(cand_thr);
3656 tl_assert(cand_rcec);
3657 tl_assert(cand_rcec->magic == RCEC_MAGIC);
3658 tl_assert(cand_szB >= 1);
njn3a4b58f2009-05-07 23:08:10 +00003659 /* Count how many non-zero frames we have. */
3660 maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
3661 for (n = 0; n < maxNFrames; n++) {
3662 if (0 == cand_rcec->frames[n]) break;
3663 }
3664 *resEC = VG_(make_ExeContext_from_StackTrace)(cand_rcec->frames, n);
sewardja781be62008-12-08 00:12:28 +00003665 *resThr = cand_thr;
3666 *resSzB = cand_szB;
3667 *resIsW = cand_isW;
3668 return True;
3669 }
sewardjc5ea9962008-12-07 01:41:46 +00003670
sewardja781be62008-12-08 00:12:28 +00003671 /* consider next address in toCheck[] */
3672 } /* for (j = 0; j < nToCheck; j++) */
sewardjf98e1c02008-10-25 16:22:41 +00003673
sewardja781be62008-12-08 00:12:28 +00003674 /* really didn't find anything. */
3675 return False;
sewardjf98e1c02008-10-25 16:22:41 +00003676}
3677
3678static void event_map_init ( void )
3679{
3680 Word i;
sewardjd86e3a22008-12-03 11:39:37 +00003681
3682 /* Context (RCEC) group allocator */
3683 init_GroupAlloc ( &rcec_group_allocator,
3684 sizeof(RCEC),
3685 1000 /* RCECs per group */,
3686 HG_(zalloc),
3687 "libhb.event_map_init.1 (RCEC groups)",
3688 HG_(free) );
3689
3690 /* Context table */
sewardjf98e1c02008-10-25 16:22:41 +00003691 tl_assert(!contextTab);
sewardjd86e3a22008-12-03 11:39:37 +00003692 contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
sewardjf98e1c02008-10-25 16:22:41 +00003693 N_RCEC_TAB * sizeof(RCEC*) );
3694 tl_assert(contextTab);
3695 for (i = 0; i < N_RCEC_TAB; i++)
3696 contextTab[i] = NULL;
3697
sewardjd86e3a22008-12-03 11:39:37 +00003698 /* Oldref group allocator */
3699 init_GroupAlloc ( &oldref_group_allocator,
3700 sizeof(OldRef),
3701 1000 /* OldRefs per group */,
3702 HG_(zalloc),
3703 "libhb.event_map_init.3 (OldRef groups)",
3704 HG_(free) );
3705
sewardjd86e3a22008-12-03 11:39:37 +00003706 /* Oldref tree */
sewardjf98e1c02008-10-25 16:22:41 +00003707 tl_assert(!oldrefTree);
sewardjbc307e52008-12-06 22:10:54 +00003708 oldrefTree = VG_(newSWA)(
3709 HG_(zalloc),
sewardjd86e3a22008-12-03 11:39:37 +00003710 "libhb.event_map_init.4 (oldref tree)",
sewardjbc307e52008-12-06 22:10:54 +00003711 HG_(free)
sewardjf98e1c02008-10-25 16:22:41 +00003712 );
3713 tl_assert(oldrefTree);
3714
3715 oldrefGen = 0;
3716 oldrefGenIncAt = 0;
3717 oldrefTreeN = 0;
3718}
3719
3720static void event_map__check_reference_counts ( Bool before )
3721{
3722 RCEC* rcec;
3723 OldRef* oldref;
3724 Word i;
3725 UWord nEnts = 0;
sewardjd86e3a22008-12-03 11:39:37 +00003726 UWord keyW, valW;
sewardjf98e1c02008-10-25 16:22:41 +00003727
3728 /* Set the 'check' reference counts to zero. Also, optionally
3729 check that the real reference counts are non-zero. We allow
3730 these to fall to zero before a GC, but the GC must get rid of
3731 all those that are zero, hence none should be zero after a
3732 GC. */
3733 for (i = 0; i < N_RCEC_TAB; i++) {
3734 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3735 nEnts++;
3736 tl_assert(rcec);
3737 tl_assert(rcec->magic == RCEC_MAGIC);
3738 if (!before)
3739 tl_assert(rcec->rc > 0);
3740 rcec->rcX = 0;
3741 }
3742 }
3743
3744 /* check that the stats are sane */
3745 tl_assert(nEnts == stats__ctxt_tab_curr);
3746 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
3747
3748 /* visit all the referencing points, inc check ref counts */
sewardjbc307e52008-12-06 22:10:54 +00003749 VG_(initIterSWA)( oldrefTree );
3750 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003751 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003752 tl_assert(oldref->magic == OldRef_MAGIC);
3753 for (i = 0; i < N_OLDREF_ACCS; i++) {
sewardjc5ea9962008-12-07 01:41:46 +00003754 Thr* aThr = ptr_and_UWord(oldref->accs[i].thr, ~3);
3755 RCEC* aRef = ptr_and_UWord(oldref->accs[i].rcec, ~3);
3756 if (aThr) {
3757 tl_assert(aRef);
3758 tl_assert(aRef->magic == RCEC_MAGIC);
3759 aRef->rcX++;
sewardjf98e1c02008-10-25 16:22:41 +00003760 } else {
sewardjc5ea9962008-12-07 01:41:46 +00003761 tl_assert(!aRef);
sewardjf98e1c02008-10-25 16:22:41 +00003762 }
3763 }
3764 }
3765
3766 /* compare check ref counts with actual */
3767 for (i = 0; i < N_RCEC_TAB; i++) {
3768 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3769 tl_assert(rcec->rc == rcec->rcX);
3770 }
3771 }
3772}
3773
sewardj8fd92d32008-11-20 23:17:01 +00003774__attribute__((noinline))
sewardjf98e1c02008-10-25 16:22:41 +00003775static void event_map_maybe_GC ( void )
3776{
3777 OldRef* oldref;
3778 UWord keyW, valW, retained, maxGen;
sewardjf98e1c02008-10-25 16:22:41 +00003779 XArray* refs2del;
3780 Word i, j, n2del;
3781
sewardj8fd92d32008-11-20 23:17:01 +00003782 UWord* genMap = NULL;
3783 UWord genMap_min = 0;
3784 UWord genMap_size = 0;
3785
sewardj849b0ed2008-12-21 10:43:10 +00003786 if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
sewardjf98e1c02008-10-25 16:22:41 +00003787 return;
3788
3789 if (0)
3790 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3791
sewardj849b0ed2008-12-21 10:43:10 +00003792 /* Check for sane command line params. Limit values must match
3793 those in hg_process_cmd_line_option. */
3794 tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
3795 tl_assert( HG_(clo_conflict_cache_size) <= 10*1000*1000 );
3796
sewardj8f5374e2008-12-07 11:40:17 +00003797 /* Check our counting is sane (expensive) */
3798 if (CHECK_CEM)
3799 tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
sewardjf98e1c02008-10-25 16:22:41 +00003800
sewardj8f5374e2008-12-07 11:40:17 +00003801 /* Check the reference counts (expensive) */
3802 if (CHECK_CEM)
3803 event_map__check_reference_counts( True/*before*/ );
sewardjf98e1c02008-10-25 16:22:41 +00003804
sewardj8fd92d32008-11-20 23:17:01 +00003805 /* Compute the distribution of generation values in the ref tree.
3806 There are likely only to be a few different generation numbers
3807 in the whole tree, but we don't know what they are. Hence use a
3808 dynamically resized array of counters. The array is genMap[0
3809 .. genMap_size-1], where genMap[0] is the count for the
3810 generation number genMap_min, genMap[1] is the count for
3811 genMap_min+1, etc. If a new number is seen outside the range
3812 [genMap_min .. genMap_min + genMap_size - 1] then the array is
3813 copied into a larger array, and genMap_min and genMap_size are
3814 adjusted accordingly. */
3815
sewardjf98e1c02008-10-25 16:22:41 +00003816 /* genMap :: generation-number -> count-of-nodes-with-that-number */
sewardjf98e1c02008-10-25 16:22:41 +00003817
sewardjbc307e52008-12-06 22:10:54 +00003818 VG_(initIterSWA)( oldrefTree );
3819 while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj8fd92d32008-11-20 23:17:01 +00003820
sewardjd86e3a22008-12-03 11:39:37 +00003821 UWord ea, key;
3822 oldref = (OldRef*)valW;
3823 key = oldref->gen;
sewardj8fd92d32008-11-20 23:17:01 +00003824
3825 /* BEGIN find 'ea', which is the index in genMap holding the
3826 count for generation number 'key'. */
3827 if (UNLIKELY(genMap == NULL)) {
3828 /* deal with the first key to be seen, so that the following
3829 cases don't need to handle the complexity of a NULL count
3830 array. */
3831 genMap_min = key;
3832 genMap_size = 1;
3833 genMap = HG_(zalloc)( "libhb.emmG.1a",
3834 genMap_size * sizeof(UWord) );
3835 ea = 0;
3836 if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
3837 key, genMap_min, genMap_min+genMap_size- 1 );
sewardjf98e1c02008-10-25 16:22:41 +00003838 }
sewardj8fd92d32008-11-20 23:17:01 +00003839 else
3840 if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
3841 /* this is the expected (almost-always-happens) case: 'key'
3842 is already mapped in the array. */
3843 ea = key - genMap_min;
3844 }
3845 else
3846 if (key < genMap_min) {
3847 /* 'key' appears before the start of the current array.
3848 Extend the current array by allocating a larger one and
3849 copying the current one to the upper end of it. */
3850 Word more;
3851 UWord* map2;
3852 more = genMap_min - key;
3853 tl_assert(more > 0);
3854 map2 = HG_(zalloc)( "libhb.emmG.1b",
3855 (genMap_size + more) * sizeof(UWord) );
3856 VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
3857 HG_(free)( genMap );
3858 genMap = map2;
3859 genMap_size += more;
3860 genMap_min -= more;
3861 ea = 0;
3862 tl_assert(genMap_min == key);
3863 if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
3864 key, genMap_min, genMap_min+genMap_size- 1 );
3865 }
3866 else {
3867 /* 'key' appears after the end of the current array. Extend
3868 the current array by allocating a larger one and copying
3869 the current one to the lower end of it. */
3870 Word more;
3871 UWord* map2;
3872 tl_assert(key >= genMap_min + genMap_size);
3873 more = key - (genMap_min + genMap_size) + 1;
3874 tl_assert(more > 0);
3875 map2 = HG_(zalloc)( "libhb.emmG.1c",
3876 (genMap_size + more) * sizeof(UWord) );
3877 VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
3878 HG_(free)( genMap );
3879 genMap = map2;
3880 genMap_size += more;
3881 ea = genMap_size - 1;;
3882 tl_assert(genMap_min + genMap_size - 1 == key);
3883 if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
3884 key, genMap_min, genMap_min+genMap_size- 1 );
3885 }
3886 /* END find 'ea' from 'key' */
3887
3888 tl_assert(ea >= 0 && ea < genMap_size);
sewardjd86e3a22008-12-03 11:39:37 +00003889 /* and the whole point of this elaborate computation of 'ea' is .. */
sewardj8fd92d32008-11-20 23:17:01 +00003890 genMap[ea]++;
sewardjf98e1c02008-10-25 16:22:41 +00003891 }
3892
sewardj8fd92d32008-11-20 23:17:01 +00003893 tl_assert(genMap);
3894 tl_assert(genMap_size > 0);
sewardjf98e1c02008-10-25 16:22:41 +00003895
sewardj8fd92d32008-11-20 23:17:01 +00003896 /* Sanity check what we just computed */
3897 { UWord sum = 0;
3898 for (i = 0; i < genMap_size; i++) {
3899 if (0) VG_(printf)(" xxx: gen %ld has %lu\n",
3900 i + genMap_min, genMap[i] );
3901 sum += genMap[i];
3902 }
3903 tl_assert(sum == oldrefTreeN);
3904 }
3905
3906 /* Figure out how many generations to throw away */
sewardjf98e1c02008-10-25 16:22:41 +00003907 retained = oldrefTreeN;
3908 maxGen = 0;
sewardj8fd92d32008-11-20 23:17:01 +00003909
3910 for (i = 0; i < genMap_size; i++) {
3911 keyW = i + genMap_min;
3912 valW = genMap[i];
sewardjf98e1c02008-10-25 16:22:41 +00003913 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3914 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3915 tl_assert(keyW >= maxGen);
3916 tl_assert(retained >= valW);
3917 if (retained - valW
sewardj849b0ed2008-12-21 10:43:10 +00003918 > (UWord)(HG_(clo_conflict_cache_size)
3919 * EVENT_MAP_GC_DISCARD_FRACTION)) {
sewardjf98e1c02008-10-25 16:22:41 +00003920 retained -= valW;
3921 maxGen = keyW;
3922 } else {
3923 break;
3924 }
3925 }
sewardjf98e1c02008-10-25 16:22:41 +00003926
sewardj8fd92d32008-11-20 23:17:01 +00003927 HG_(free)(genMap);
sewardjf98e1c02008-10-25 16:22:41 +00003928
sewardj9b1f0fd2008-11-18 23:40:00 +00003929 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00003930
3931 /* Now make up a big list of the oldrefTree entries we want to
3932 delete. We can't simultaneously traverse the tree and delete
3933 stuff from it, so first we need to copy them off somewhere
3934 else. (sigh) */
sewardj8fd92d32008-11-20 23:17:01 +00003935 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
sewardjd86e3a22008-12-03 11:39:37 +00003936 HG_(free), sizeof(Addr) );
sewardjf98e1c02008-10-25 16:22:41 +00003937
sewardj9b1f0fd2008-11-18 23:40:00 +00003938 if (retained < oldrefTreeN) {
3939
3940 /* This is the normal (expected) case. We discard any ref whose
3941 generation number <= maxGen. */
sewardjbc307e52008-12-06 22:10:54 +00003942 VG_(initIterSWA)( oldrefTree );
3943 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardjd86e3a22008-12-03 11:39:37 +00003944 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003945 tl_assert(oldref->magic == OldRef_MAGIC);
3946 if (oldref->gen <= maxGen) {
sewardjd86e3a22008-12-03 11:39:37 +00003947 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003948 }
sewardjf98e1c02008-10-25 16:22:41 +00003949 }
sewardj9b1f0fd2008-11-18 23:40:00 +00003950 if (VG_(clo_verbosity) > 1) {
3951 VG_(message)(Vg_DebugMsg,
3952 "libhb: EvM GC: delete generations %lu and below, "
sewardj24118492009-07-15 14:50:02 +00003953 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00003954 maxGen, retained );
3955 }
3956
3957 } else {
3958
3959 static UInt rand_seed = 0; /* leave as static */
3960
3961 /* Degenerate case: there's only one generation in the entire
3962 tree, so we need to have some other way of deciding which
3963 refs to throw away. Just throw out half of them randomly. */
3964 tl_assert(retained == oldrefTreeN);
sewardjbc307e52008-12-06 22:10:54 +00003965 VG_(initIterSWA)( oldrefTree );
3966 while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
sewardj9b1f0fd2008-11-18 23:40:00 +00003967 UInt n;
sewardjd86e3a22008-12-03 11:39:37 +00003968 oldref = (OldRef*)valW;
sewardj9b1f0fd2008-11-18 23:40:00 +00003969 tl_assert(oldref->magic == OldRef_MAGIC);
3970 n = VG_(random)( &rand_seed );
3971 if ((n & 0xFFF) < 0x800) {
sewardjd86e3a22008-12-03 11:39:37 +00003972 VG_(addToXA)( refs2del, &keyW );
sewardj9b1f0fd2008-11-18 23:40:00 +00003973 retained--;
3974 }
3975 }
3976 if (VG_(clo_verbosity) > 1) {
3977 VG_(message)(Vg_DebugMsg,
3978 "libhb: EvM GC: randomly delete half the entries, "
sewardj24118492009-07-15 14:50:02 +00003979 "retaining %lu entries\n",
sewardj9b1f0fd2008-11-18 23:40:00 +00003980 retained );
3981 }
3982
sewardjf98e1c02008-10-25 16:22:41 +00003983 }
3984
3985 n2del = VG_(sizeXA)( refs2del );
3986 tl_assert(n2del == (Word)(oldrefTreeN - retained));
3987
3988 if (0) VG_(printf)("%s","deleting entries\n");
3989 for (i = 0; i < n2del; i++) {
sewardjd86e3a22008-12-03 11:39:37 +00003990 Bool b;
3991 Addr ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
sewardjbc307e52008-12-06 22:10:54 +00003992 b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
sewardjd86e3a22008-12-03 11:39:37 +00003993 tl_assert(b);
3994 tl_assert(keyW == ga2del);
3995 oldref = (OldRef*)valW;
sewardjf98e1c02008-10-25 16:22:41 +00003996 for (j = 0; j < N_OLDREF_ACCS; j++) {
sewardjc5ea9962008-12-07 01:41:46 +00003997 Thr* aThr = ptr_and_UWord(oldref->accs[j].thr, ~3);
3998 RCEC* aRef = ptr_and_UWord(oldref->accs[j].rcec, ~3);
3999 if (aRef) {
4000 tl_assert(aThr);
sewardjf98e1c02008-10-25 16:22:41 +00004001 stats__ctxt_rcdec3++;
sewardjc5ea9962008-12-07 01:41:46 +00004002 ctxt__rcdec( aRef );
sewardjf98e1c02008-10-25 16:22:41 +00004003 } else {
sewardjc5ea9962008-12-07 01:41:46 +00004004 tl_assert(!aThr);
sewardjf98e1c02008-10-25 16:22:41 +00004005 }
4006 }
sewardjd86e3a22008-12-03 11:39:37 +00004007
4008 free_OldRef( oldref );
sewardjf98e1c02008-10-25 16:22:41 +00004009 }
4010
4011 VG_(deleteXA)( refs2del );
4012
sewardjc5ea9962008-12-07 01:41:46 +00004013 tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
sewardjf98e1c02008-10-25 16:22:41 +00004014
4015 oldrefTreeN = retained;
4016 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
4017
4018 /* Throw away all RCECs with zero reference counts */
4019 for (i = 0; i < N_RCEC_TAB; i++) {
4020 RCEC** pp = &contextTab[i];
4021 RCEC* p = *pp;
4022 while (p) {
4023 if (p->rc == 0) {
4024 *pp = p->next;
sewardjd86e3a22008-12-03 11:39:37 +00004025 free_RCEC(p);
sewardjf98e1c02008-10-25 16:22:41 +00004026 p = *pp;
4027 tl_assert(stats__ctxt_tab_curr > 0);
4028 stats__ctxt_tab_curr--;
4029 } else {
4030 pp = &p->next;
4031 p = p->next;
4032 }
4033 }
4034 }
4035
sewardj8f5374e2008-12-07 11:40:17 +00004036 /* Check the reference counts (expensive) */
4037 if (CHECK_CEM)
4038 event_map__check_reference_counts( False/*after*/ );
sewardjf98e1c02008-10-25 16:22:41 +00004039
4040 //if (0)
4041 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
4042 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
4043
4044}
4045
4046
4047/////////////////////////////////////////////////////////
4048// //
4049// Core MSM //
4050// //
4051/////////////////////////////////////////////////////////
4052
sewardj23f12002009-07-24 08:45:08 +00004053/* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4054 Nov 08, and again after [...],
4055 June 09. */
sewardjb0e009d2008-11-19 16:35:15 +00004056
sewardj23f12002009-07-24 08:45:08 +00004057static ULong stats__msmcread = 0;
4058static ULong stats__msmcread_change = 0;
4059static ULong stats__msmcwrite = 0;
4060static ULong stats__msmcwrite_change = 0;
sewardjf98e1c02008-10-25 16:22:41 +00004061
sewardj8ab2c132009-08-02 09:34:35 +00004062/* Some notes on the H1 history mechanism:
4063
4064 Transition rules are:
4065
4066 read_{Kr,Kw}(Cr,Cw) = (Cr, Cr `join` Kw)
4067 write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4068
4069 After any access by a thread T to a location L, L's constraint pair
4070 (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4071
4072 After a race by thread T conflicting with some previous access by
4073 some other thread U, for a location with constraint (before
4074 processing the later access) (Cr,Cw), then Cw[U] is the segment in
4075 which the previously access lies.
4076
4077 Hence in record_race_info, we pass in Cfailed and Kfailed, which
4078 are compared so as to find out which thread(s) this access
4079 conflicts with. Once that is established, we also require the
4080 pre-update Cw for the location, so we can index into it for those
4081 threads, to get the scalar clock values for the point at which the
4082 former accesses were made. (In fact we only bother to do any of
4083 this for an arbitrarily chosen one of the conflicting threads, as
4084 that's simpler, it avoids flooding the user with vast amounts of
4085 mostly useless information, and because the program is wrong if it
4086 contains any races at all -- so we don't really need to show all
4087 conflicting access pairs initially, so long as we only show none if
4088 none exist).
4089
4090 ---
4091
4092 That requires the auxiliary proof that
4093
4094 (Cr `join` Kw)[T] == Kw[T]
4095
4096 Why should that be true? Because for any thread T, Kw[T] >= the
4097 scalar clock value for T known by any other thread. In other
4098 words, because T's value for its own scalar clock is at least as up
4099 to date as the value for it known by any other thread (that is true
4100 for both the R- and W- scalar clocks). Hence no other thread will
4101 be able to feed in a value for that element (indirectly via a
4102 constraint) which will exceed Kw[T], and hence the join cannot
4103 cause that particular element to advance.
4104*/
4105
sewardjf98e1c02008-10-25 16:22:41 +00004106__attribute__((noinline))
4107static void record_race_info ( Thr* acc_thr,
sewardj23f12002009-07-24 08:45:08 +00004108 Addr acc_addr, SizeT szB, Bool isWrite,
sewardj8ab2c132009-08-02 09:34:35 +00004109 VtsID Cfailed,
4110 VtsID Kfailed,
4111 VtsID Cw )
sewardjf98e1c02008-10-25 16:22:41 +00004112{
sewardjc5ea9962008-12-07 01:41:46 +00004113 /* Call here to report a race. We just hand it onwards to
4114 HG_(record_error_Race). If that in turn discovers that the
sewardj23f12002009-07-24 08:45:08 +00004115 error is going to be collected, then, at history_level 2, that
4116 queries the conflicting-event map. The alternative would be to
4117 query it right here. But that causes a lot of pointless queries
4118 for errors which will shortly be discarded as duplicates, and
4119 can become a performance overhead; so we defer the query until
4120 we know the error is not a duplicate. */
4121
4122 /* Stacks for the bounds of the (or one of the) conflicting
4123 segment(s). These are only set at history_level 1. */
4124 ExeContext* hist1_seg_start = NULL;
4125 ExeContext* hist1_seg_end = NULL;
4126 Thread* hist1_conf_thr = NULL;
4127
4128 tl_assert(acc_thr);
sewardjc5ea9962008-12-07 01:41:46 +00004129 tl_assert(acc_thr->opaque);
sewardj23f12002009-07-24 08:45:08 +00004130 tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4131
4132 if (HG_(clo_history_level) == 1) {
4133 Bool found;
4134 Word firstIx, lastIx;
4135 ULong_n_EC key;
4136
4137 /* At history_level 1, we must round up the relevant stack-pair
4138 for the conflicting segment right now. This is because
sewardj8ab2c132009-08-02 09:34:35 +00004139 deferring it is complex; we can't (easily) put Kfailed and
4140 Cfailed into the XError and wait for later without
sewardj23f12002009-07-24 08:45:08 +00004141 getting tied up in difficulties with VtsID reference
4142 counting. So just do it now. */
4143 Thr* confThr;
4144 ULong confTym = 0;
4145 /* Which thread are we in conflict with? There may be more than
4146 one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4147 (in fact it's the one with the lowest Thr* value). */
sewardj8ab2c132009-08-02 09:34:35 +00004148 confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
sewardj23f12002009-07-24 08:45:08 +00004149 /* This must exist! since if it was NULL then there's no
sewardj8ab2c132009-08-02 09:34:35 +00004150 conflict (semantics of return value of
4151 VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4152 called us, just checked exactly this -- that there was in
4153 fact a race. */
sewardj23f12002009-07-24 08:45:08 +00004154 tl_assert(confThr);
4155
4156 /* Get the scalar clock value that the conflicting thread
4157 introduced into the constraint. A careful examination of the
4158 base machine rules shows that this must be the same as the
4159 conflicting thread's scalar clock when it created this
4160 constraint. Hence we know the scalar clock of the
4161 conflicting thread when the conflicting access was made. */
sewardj8ab2c132009-08-02 09:34:35 +00004162 confTym = VtsID__indexAt( Cfailed, confThr );
sewardj23f12002009-07-24 08:45:08 +00004163
4164 /* Using this scalar clock, index into the conflicting thread's
4165 collection of stack traces made each time its vector clock
4166 (hence its scalar clock) changed. This gives the stack
4167 traces at the start and end of the conflicting segment (well,
4168 as per comment just above, of one of the conflicting
4169 segments, if there are more than one). */
4170 key.ull = confTym;
4171 key.ec = NULL;
4172 /* tl_assert(confThr); -- asserted just above */
sewardj8ab2c132009-08-02 09:34:35 +00004173 tl_assert(confThr->local_Kws_n_stacks);
sewardj23f12002009-07-24 08:45:08 +00004174 firstIx = lastIx = 0;
4175 found = VG_(lookupXA_UNSAFE)(
sewardj8ab2c132009-08-02 09:34:35 +00004176 confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004177 &key, &firstIx, &lastIx,
4178 (Int(*)(void*,void*))cmp__ULong_n_EC__by_ULong
4179 );
sewardj8ab2c132009-08-02 09:34:35 +00004180 if (0) VG_(printf)("record_race_info %u %u %u confThr %p "
sewardj23f12002009-07-24 08:45:08 +00004181 "confTym %llu found %d (%lu,%lu)\n",
sewardj8ab2c132009-08-02 09:34:35 +00004182 Cfailed, Kfailed, Cw,
sewardj23f12002009-07-24 08:45:08 +00004183 confThr, confTym, found, firstIx, lastIx);
4184 /* We can't indefinitely collect stack traces at VTS
4185 transitions, since we'd eventually run out of memory. Hence
sewardj8ab2c132009-08-02 09:34:35 +00004186 note_local_Kw_n_stack_for will eventually throw away old
sewardj23f12002009-07-24 08:45:08 +00004187 ones, which in turn means we might fail to find index value
4188 confTym in the array. */
4189 if (found) {
4190 ULong_n_EC *pair_start, *pair_end;
4191 pair_start
sewardj8ab2c132009-08-02 09:34:35 +00004192 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
sewardj23f12002009-07-24 08:45:08 +00004193 hist1_seg_start = pair_start->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004194 if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
sewardj23f12002009-07-24 08:45:08 +00004195 pair_end
sewardj8ab2c132009-08-02 09:34:35 +00004196 = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
sewardj23f12002009-07-24 08:45:08 +00004197 lastIx+1 );
4198 /* from properties of VG_(lookupXA) and the comparison fn used: */
4199 tl_assert(pair_start->ull < pair_end->ull);
4200 hist1_seg_end = pair_end->ec;
sewardj8ab2c132009-08-02 09:34:35 +00004201 /* Could do a bit better here. It may be that pair_end
4202 doesn't have a stack, but the following entries in the
4203 array have the same scalar Kw and to have a stack. So
4204 we should search a bit further along the array than
4205 lastIx+1 if hist1_seg_end is NULL. */
sewardj23f12002009-07-24 08:45:08 +00004206 } else {
4207 if (confThr->still_alive)
4208 hist1_seg_end = main_get_EC( confThr );
4209 }
4210 // seg_start could be NULL iff this is the first stack in the thread
4211 //if (seg_start) VG_(pp_ExeContext)(seg_start);
4212 //if (seg_end) VG_(pp_ExeContext)(seg_end);
4213 hist1_conf_thr = confThr->opaque;
4214 }
4215 }
4216
sewardjc5ea9962008-12-07 01:41:46 +00004217 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
sewardj23f12002009-07-24 08:45:08 +00004218 szB, isWrite,
4219 hist1_conf_thr, hist1_seg_start, hist1_seg_end );
sewardjf98e1c02008-10-25 16:22:41 +00004220}
4221
4222static Bool is_sane_SVal_C ( SVal sv ) {
sewardj23f12002009-07-24 08:45:08 +00004223 Bool leq;
sewardjf98e1c02008-10-25 16:22:41 +00004224 if (!SVal__isC(sv)) return True;
sewardj23f12002009-07-24 08:45:08 +00004225 leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4226 return leq;
sewardjf98e1c02008-10-25 16:22:41 +00004227}
4228
4229
4230/* Compute new state following a read */
sewardj23f12002009-07-24 08:45:08 +00004231static inline SVal msmcread ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004232 /* The following are only needed for
4233 creating error reports. */
4234 Thr* acc_thr,
4235 Addr acc_addr, SizeT szB )
4236{
4237 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004238 stats__msmcread++;
sewardjf98e1c02008-10-25 16:22:41 +00004239
4240 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004241 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004242 tl_assert(is_sane_SVal_C(svOld));
4243 }
4244
sewardj1c0ce7a2009-07-01 08:10:49 +00004245 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004246 VtsID tviR = acc_thr->viR;
4247 VtsID tviW = acc_thr->viW;
4248 VtsID rmini = SVal__unC_Rmin(svOld);
4249 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004250 Bool leq = VtsID__cmpLEQ(rmini,tviR);
4251 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004252 /* no race */
4253 /* Note: RWLOCK subtlety: use tviW, not tviR */
4254 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4255 goto out;
4256 } else {
sewardjb0e009d2008-11-19 16:35:15 +00004257 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004258 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4259 tl_assert(leqxx);
4260 // same as in non-race case
4261 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4262 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004263 rmini, /* Cfailed */
4264 tviR, /* Kfailed */
4265 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004266 goto out;
4267 }
4268 }
4269 if (SVal__isA(svOld)) {
4270 /* reading no-access memory (sigh); leave unchanged */
4271 /* check for no pollution */
4272 tl_assert(svOld == SVal_NOACCESS);
4273 svNew = SVal_NOACCESS;
4274 goto out;
4275 }
sewardj23f12002009-07-24 08:45:08 +00004276 if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004277 tl_assert(0);
4278
4279 out:
sewardj8f5374e2008-12-07 11:40:17 +00004280 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004281 tl_assert(is_sane_SVal_C(svNew));
4282 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004283 if (UNLIKELY(svNew != svOld)) {
4284 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004285 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004286 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004287 event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004288 stats__msmcread_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004289 }
4290 }
4291 return svNew;
4292}
4293
4294
4295/* Compute new state following a write */
sewardj23f12002009-07-24 08:45:08 +00004296static inline SVal msmcwrite ( SVal svOld,
sewardjf98e1c02008-10-25 16:22:41 +00004297 /* The following are only needed for
4298 creating error reports. */
4299 Thr* acc_thr,
4300 Addr acc_addr, SizeT szB )
4301{
4302 SVal svNew = SVal_INVALID;
sewardj23f12002009-07-24 08:45:08 +00004303 stats__msmcwrite++;
sewardjf98e1c02008-10-25 16:22:41 +00004304
4305 /* Redundant sanity check on the constraints */
sewardj8f5374e2008-12-07 11:40:17 +00004306 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004307 tl_assert(is_sane_SVal_C(svOld));
4308 }
4309
sewardj1c0ce7a2009-07-01 08:10:49 +00004310 if (LIKELY(SVal__isC(svOld))) {
sewardjf98e1c02008-10-25 16:22:41 +00004311 VtsID tviW = acc_thr->viW;
4312 VtsID wmini = SVal__unC_Wmin(svOld);
sewardj23f12002009-07-24 08:45:08 +00004313 Bool leq = VtsID__cmpLEQ(wmini,tviW);
4314 if (LIKELY(leq)) {
sewardjf98e1c02008-10-25 16:22:41 +00004315 /* no race */
4316 svNew = SVal__mkC( tviW, tviW );
4317 goto out;
4318 } else {
4319 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00004320 /* assert on sanity of constraints. */
sewardj23f12002009-07-24 08:45:08 +00004321 Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4322 tl_assert(leqxx);
4323 // same as in non-race case
4324 // proof: in the non-race case, we have
4325 // rmini <= wmini (invar on constraints)
4326 // tviW <= tviR (invar on thread clocks)
4327 // wmini <= tviW (from run-time check)
4328 // hence from transitivity of <= we have
4329 // rmini <= wmini <= tviW
4330 // and so join(rmini,tviW) == tviW
4331 // and join(wmini,tviW) == tviW
4332 // qed.
4333 svNew = SVal__mkC( VtsID__join2(rmini, tviW),
4334 VtsID__join2(wmini, tviW) );
4335 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
sewardj8ab2c132009-08-02 09:34:35 +00004336 wmini, /* Cfailed */
4337 tviW, /* Kfailed */
4338 wmini /* Cw */ );
sewardjf98e1c02008-10-25 16:22:41 +00004339 goto out;
4340 }
4341 }
4342 if (SVal__isA(svOld)) {
4343 /* writing no-access memory (sigh); leave unchanged */
4344 /* check for no pollution */
4345 tl_assert(svOld == SVal_NOACCESS);
4346 svNew = SVal_NOACCESS;
4347 goto out;
4348 }
sewardj23f12002009-07-24 08:45:08 +00004349 if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
sewardjf98e1c02008-10-25 16:22:41 +00004350 tl_assert(0);
4351
4352 out:
sewardj8f5374e2008-12-07 11:40:17 +00004353 if (CHECK_MSM) {
sewardjf98e1c02008-10-25 16:22:41 +00004354 tl_assert(is_sane_SVal_C(svNew));
4355 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004356 if (UNLIKELY(svNew != svOld)) {
4357 tl_assert(svNew != SVal_INVALID);
sewardj23f12002009-07-24 08:45:08 +00004358 if (HG_(clo_history_level) >= 2
sewardj1c0ce7a2009-07-01 08:10:49 +00004359 && SVal__isC(svOld) && SVal__isC(svNew)) {
sewardjc5ea9962008-12-07 01:41:46 +00004360 event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
sewardj23f12002009-07-24 08:45:08 +00004361 stats__msmcwrite_change++;
sewardjf98e1c02008-10-25 16:22:41 +00004362 }
4363 }
4364 return svNew;
4365}
4366
4367
4368/////////////////////////////////////////////////////////
4369// //
4370// Apply core MSM to specific memory locations //
4371// //
4372/////////////////////////////////////////////////////////
4373
sewardj23f12002009-07-24 08:45:08 +00004374/*------------- ZSM accesses: 8 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004375
sewardj23f12002009-07-24 08:45:08 +00004376static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004377 CacheLine* cl;
4378 UWord cloff, tno, toff;
4379 SVal svOld, svNew;
4380 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004381 stats__cline_cread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004382 cl = get_cacheline(a);
4383 cloff = get_cacheline_offset(a);
4384 tno = get_treeno(a);
4385 toff = get_tree_offset(a); /* == 0 .. 7 */
4386 descr = cl->descrs[tno];
4387 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4388 SVal* tree = &cl->svals[tno << 3];
4389 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004390 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004391 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4392 }
4393 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004394 svNew = msmcread( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004395 if (CHECK_ZSM)
4396 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004397 cl->svals[cloff] = svNew;
4398}
4399
sewardj23f12002009-07-24 08:45:08 +00004400static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004401 CacheLine* cl;
4402 UWord cloff, tno, toff;
4403 SVal svOld, svNew;
4404 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004405 stats__cline_cwrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004406 cl = get_cacheline(a);
4407 cloff = get_cacheline_offset(a);
4408 tno = get_treeno(a);
4409 toff = get_tree_offset(a); /* == 0 .. 7 */
4410 descr = cl->descrs[tno];
4411 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4412 SVal* tree = &cl->svals[tno << 3];
4413 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004414 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004415 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4416 }
4417 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004418 svNew = msmcwrite( svOld, thr,a,1 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004419 if (CHECK_ZSM)
4420 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004421 cl->svals[cloff] = svNew;
4422}
4423
sewardj23f12002009-07-24 08:45:08 +00004424/*------------- ZSM accesses: 16 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004425
sewardj23f12002009-07-24 08:45:08 +00004426static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004427 CacheLine* cl;
4428 UWord cloff, tno, toff;
4429 SVal svOld, svNew;
4430 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004431 stats__cline_cread16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004432 if (UNLIKELY(!aligned16(a))) goto slowcase;
4433 cl = get_cacheline(a);
4434 cloff = get_cacheline_offset(a);
4435 tno = get_treeno(a);
4436 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4437 descr = cl->descrs[tno];
4438 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4439 if (valid_value_is_below_me_16(descr, toff)) {
4440 goto slowcase;
4441 } else {
4442 SVal* tree = &cl->svals[tno << 3];
4443 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4444 }
sewardj8f5374e2008-12-07 11:40:17 +00004445 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004446 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4447 }
4448 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004449 svNew = msmcread( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004450 if (CHECK_ZSM)
4451 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004452 cl->svals[cloff] = svNew;
4453 return;
4454 slowcase: /* misaligned, or must go further down the tree */
4455 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004456 zsm_sapply08__msmcread( thr, a + 0 );
4457 zsm_sapply08__msmcread( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004458}
4459
sewardj23f12002009-07-24 08:45:08 +00004460static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004461 CacheLine* cl;
4462 UWord cloff, tno, toff;
4463 SVal svOld, svNew;
4464 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004465 stats__cline_cwrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004466 if (UNLIKELY(!aligned16(a))) goto slowcase;
4467 cl = get_cacheline(a);
4468 cloff = get_cacheline_offset(a);
4469 tno = get_treeno(a);
4470 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4471 descr = cl->descrs[tno];
4472 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4473 if (valid_value_is_below_me_16(descr, toff)) {
4474 goto slowcase;
4475 } else {
4476 SVal* tree = &cl->svals[tno << 3];
4477 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
4478 }
sewardj8f5374e2008-12-07 11:40:17 +00004479 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004480 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4481 }
4482 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004483 svNew = msmcwrite( svOld, thr,a,2 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004484 if (CHECK_ZSM)
4485 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004486 cl->svals[cloff] = svNew;
4487 return;
4488 slowcase: /* misaligned, or must go further down the tree */
4489 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004490 zsm_sapply08__msmcwrite( thr, a + 0 );
4491 zsm_sapply08__msmcwrite( thr, a + 1 );
sewardjf98e1c02008-10-25 16:22:41 +00004492}
4493
sewardj23f12002009-07-24 08:45:08 +00004494/*------------- ZSM accesses: 32 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004495
sewardj23f12002009-07-24 08:45:08 +00004496static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004497 CacheLine* cl;
4498 UWord cloff, tno, toff;
4499 SVal svOld, svNew;
4500 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004501 stats__cline_cread32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004502 if (UNLIKELY(!aligned32(a))) goto slowcase;
4503 cl = get_cacheline(a);
4504 cloff = get_cacheline_offset(a);
4505 tno = get_treeno(a);
4506 toff = get_tree_offset(a); /* == 0 or 4 */
4507 descr = cl->descrs[tno];
4508 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4509 if (valid_value_is_above_me_32(descr, toff)) {
4510 SVal* tree = &cl->svals[tno << 3];
4511 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4512 } else {
4513 goto slowcase;
4514 }
sewardj8f5374e2008-12-07 11:40:17 +00004515 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004516 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4517 }
4518 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004519 svNew = msmcread( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004520 if (CHECK_ZSM)
4521 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004522 cl->svals[cloff] = svNew;
4523 return;
4524 slowcase: /* misaligned, or must go further down the tree */
4525 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004526 zsm_sapply16__msmcread( thr, a + 0 );
4527 zsm_sapply16__msmcread( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00004528}
4529
sewardj23f12002009-07-24 08:45:08 +00004530static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004531 CacheLine* cl;
4532 UWord cloff, tno, toff;
4533 SVal svOld, svNew;
4534 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004535 stats__cline_cwrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004536 if (UNLIKELY(!aligned32(a))) goto slowcase;
4537 cl = get_cacheline(a);
4538 cloff = get_cacheline_offset(a);
4539 tno = get_treeno(a);
4540 toff = get_tree_offset(a); /* == 0 or 4 */
4541 descr = cl->descrs[tno];
4542 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4543 if (valid_value_is_above_me_32(descr, toff)) {
4544 SVal* tree = &cl->svals[tno << 3];
4545 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
4546 } else {
4547 goto slowcase;
4548 }
sewardj8f5374e2008-12-07 11:40:17 +00004549 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004550 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4551 }
4552 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004553 svNew = msmcwrite( svOld, thr,a,4 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004554 if (CHECK_ZSM)
4555 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004556 cl->svals[cloff] = svNew;
4557 return;
4558 slowcase: /* misaligned, or must go further down the tree */
4559 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004560 zsm_sapply16__msmcwrite( thr, a + 0 );
4561 zsm_sapply16__msmcwrite( thr, a + 2 );
sewardjf98e1c02008-10-25 16:22:41 +00004562}
4563
sewardj23f12002009-07-24 08:45:08 +00004564/*------------- ZSM accesses: 64 bit sapply ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004565
sewardj23f12002009-07-24 08:45:08 +00004566static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004567 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004568 UWord cloff, tno;
4569 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004570 SVal svOld, svNew;
4571 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004572 stats__cline_cread64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004573 if (UNLIKELY(!aligned64(a))) goto slowcase;
4574 cl = get_cacheline(a);
4575 cloff = get_cacheline_offset(a);
4576 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004577 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004578 descr = cl->descrs[tno];
4579 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4580 goto slowcase;
4581 }
4582 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004583 svNew = msmcread( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004584 if (CHECK_ZSM)
4585 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004586 cl->svals[cloff] = svNew;
4587 return;
4588 slowcase: /* misaligned, or must go further down the tree */
4589 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004590 zsm_sapply32__msmcread( thr, a + 0 );
4591 zsm_sapply32__msmcread( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00004592}
4593
sewardj23f12002009-07-24 08:45:08 +00004594static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004595 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004596 UWord cloff, tno;
4597 //UWord toff;
sewardjf98e1c02008-10-25 16:22:41 +00004598 SVal svOld, svNew;
4599 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004600 stats__cline_cwrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004601 if (UNLIKELY(!aligned64(a))) goto slowcase;
4602 cl = get_cacheline(a);
4603 cloff = get_cacheline_offset(a);
4604 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004605 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004606 descr = cl->descrs[tno];
4607 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
4608 goto slowcase;
4609 }
4610 svOld = cl->svals[cloff];
sewardj23f12002009-07-24 08:45:08 +00004611 svNew = msmcwrite( svOld, thr,a,8 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004612 if (CHECK_ZSM)
4613 tl_assert(svNew != SVal_INVALID);
sewardjf98e1c02008-10-25 16:22:41 +00004614 cl->svals[cloff] = svNew;
4615 return;
4616 slowcase: /* misaligned, or must go further down the tree */
4617 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004618 zsm_sapply32__msmcwrite( thr, a + 0 );
4619 zsm_sapply32__msmcwrite( thr, a + 4 );
sewardjf98e1c02008-10-25 16:22:41 +00004620}
4621
sewardj23f12002009-07-24 08:45:08 +00004622/*--------------- ZSM accesses: 8 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004623
4624static
sewardj23f12002009-07-24 08:45:08 +00004625void zsm_swrite08 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004626 CacheLine* cl;
4627 UWord cloff, tno, toff;
4628 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004629 stats__cline_swrite08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004630 cl = get_cacheline(a);
4631 cloff = get_cacheline_offset(a);
4632 tno = get_treeno(a);
4633 toff = get_tree_offset(a); /* == 0 .. 7 */
4634 descr = cl->descrs[tno];
4635 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4636 SVal* tree = &cl->svals[tno << 3];
4637 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004638 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004639 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4640 }
4641 tl_assert(svNew != SVal_INVALID);
4642 cl->svals[cloff] = svNew;
4643}
4644
sewardj23f12002009-07-24 08:45:08 +00004645/*--------------- ZSM accesses: 16 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004646
4647static
sewardj23f12002009-07-24 08:45:08 +00004648void zsm_swrite16 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004649 CacheLine* cl;
4650 UWord cloff, tno, toff;
4651 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004652 stats__cline_swrite16s++;
sewardjf98e1c02008-10-25 16:22:41 +00004653 if (UNLIKELY(!aligned16(a))) goto slowcase;
4654 cl = get_cacheline(a);
4655 cloff = get_cacheline_offset(a);
4656 tno = get_treeno(a);
4657 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
4658 descr = cl->descrs[tno];
4659 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
4660 if (valid_value_is_below_me_16(descr, toff)) {
4661 /* Writing at this level. Need to fix up 'descr'. */
4662 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
4663 /* At this point, the tree does not match cl->descr[tno] any
4664 more. The assignments below will fix it up. */
4665 } else {
4666 /* We can't indiscriminately write on the w16 node as in the
4667 w64 case, as that might make the node inconsistent with
4668 its parent. So first, pull down to this level. */
4669 SVal* tree = &cl->svals[tno << 3];
4670 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004671 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004672 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4673 }
4674 }
4675 tl_assert(svNew != SVal_INVALID);
4676 cl->svals[cloff + 0] = svNew;
4677 cl->svals[cloff + 1] = SVal_INVALID;
4678 return;
4679 slowcase: /* misaligned */
4680 stats__cline_16to8splits++;
sewardj23f12002009-07-24 08:45:08 +00004681 zsm_swrite08( a + 0, svNew );
4682 zsm_swrite08( a + 1, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004683}
4684
sewardj23f12002009-07-24 08:45:08 +00004685/*--------------- ZSM accesses: 32 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004686
4687static
sewardj23f12002009-07-24 08:45:08 +00004688void zsm_swrite32 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004689 CacheLine* cl;
4690 UWord cloff, tno, toff;
4691 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004692 stats__cline_swrite32s++;
sewardjf98e1c02008-10-25 16:22:41 +00004693 if (UNLIKELY(!aligned32(a))) goto slowcase;
4694 cl = get_cacheline(a);
4695 cloff = get_cacheline_offset(a);
4696 tno = get_treeno(a);
4697 toff = get_tree_offset(a); /* == 0 or 4 */
4698 descr = cl->descrs[tno];
4699 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
4700 if (valid_value_is_above_me_32(descr, toff)) {
4701 /* We can't indiscriminately write on the w32 node as in the
4702 w64 case, as that might make the node inconsistent with
4703 its parent. So first, pull down to this level. */
4704 SVal* tree = &cl->svals[tno << 3];
4705 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
sewardj8f5374e2008-12-07 11:40:17 +00004706 if (CHECK_ZSM)
sewardjf98e1c02008-10-25 16:22:41 +00004707 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
4708 } else {
4709 /* Writing at this level. Need to fix up 'descr'. */
4710 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
4711 /* At this point, the tree does not match cl->descr[tno] any
4712 more. The assignments below will fix it up. */
4713 }
4714 }
4715 tl_assert(svNew != SVal_INVALID);
4716 cl->svals[cloff + 0] = svNew;
4717 cl->svals[cloff + 1] = SVal_INVALID;
4718 cl->svals[cloff + 2] = SVal_INVALID;
4719 cl->svals[cloff + 3] = SVal_INVALID;
4720 return;
4721 slowcase: /* misaligned */
4722 stats__cline_32to16splits++;
sewardj23f12002009-07-24 08:45:08 +00004723 zsm_swrite16( a + 0, svNew );
4724 zsm_swrite16( a + 2, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004725}
4726
sewardj23f12002009-07-24 08:45:08 +00004727/*--------------- ZSM accesses: 64 bit swrite --------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004728
4729static
sewardj23f12002009-07-24 08:45:08 +00004730void zsm_swrite64 ( Addr a, SVal svNew ) {
sewardjf98e1c02008-10-25 16:22:41 +00004731 CacheLine* cl;
njn4c245e52009-03-15 23:25:38 +00004732 UWord cloff, tno;
4733 //UWord toff;
sewardj23f12002009-07-24 08:45:08 +00004734 stats__cline_swrite64s++;
sewardjf98e1c02008-10-25 16:22:41 +00004735 if (UNLIKELY(!aligned64(a))) goto slowcase;
4736 cl = get_cacheline(a);
4737 cloff = get_cacheline_offset(a);
4738 tno = get_treeno(a);
njn4c245e52009-03-15 23:25:38 +00004739 //toff = get_tree_offset(a); /* == 0, unused */
sewardjf98e1c02008-10-25 16:22:41 +00004740 cl->descrs[tno] = TREE_DESCR_64;
4741 tl_assert(svNew != SVal_INVALID);
4742 cl->svals[cloff + 0] = svNew;
4743 cl->svals[cloff + 1] = SVal_INVALID;
4744 cl->svals[cloff + 2] = SVal_INVALID;
4745 cl->svals[cloff + 3] = SVal_INVALID;
4746 cl->svals[cloff + 4] = SVal_INVALID;
4747 cl->svals[cloff + 5] = SVal_INVALID;
4748 cl->svals[cloff + 6] = SVal_INVALID;
4749 cl->svals[cloff + 7] = SVal_INVALID;
4750 return;
4751 slowcase: /* misaligned */
4752 stats__cline_64to32splits++;
sewardj23f12002009-07-24 08:45:08 +00004753 zsm_swrite32( a + 0, svNew );
4754 zsm_swrite32( a + 4, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004755}
4756
sewardj23f12002009-07-24 08:45:08 +00004757/*------------- ZSM accesses: 8 bit sread/scopy ------------- */
sewardjf98e1c02008-10-25 16:22:41 +00004758
4759static
sewardj23f12002009-07-24 08:45:08 +00004760SVal zsm_sread08 ( Addr a ) {
sewardjf98e1c02008-10-25 16:22:41 +00004761 CacheLine* cl;
4762 UWord cloff, tno, toff;
4763 UShort descr;
sewardj23f12002009-07-24 08:45:08 +00004764 stats__cline_sread08s++;
sewardjf98e1c02008-10-25 16:22:41 +00004765 cl = get_cacheline(a);
4766 cloff = get_cacheline_offset(a);
4767 tno = get_treeno(a);
4768 toff = get_tree_offset(a); /* == 0 .. 7 */
4769 descr = cl->descrs[tno];
4770 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
4771 SVal* tree = &cl->svals[tno << 3];
4772 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
4773 }
4774 return cl->svals[cloff];
4775}
4776
sewardj23f12002009-07-24 08:45:08 +00004777static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
sewardjf98e1c02008-10-25 16:22:41 +00004778 SVal sv;
sewardj23f12002009-07-24 08:45:08 +00004779 stats__cline_scopy08s++;
4780 sv = zsm_sread08( src );
4781 zsm_swrite08( dst, sv );
sewardjf98e1c02008-10-25 16:22:41 +00004782}
4783
4784
sewardj23f12002009-07-24 08:45:08 +00004785/* Block-copy states (needed for implementing realloc()). Note this
4786 doesn't change the filtering arrangements. The caller of
4787 zsm_scopy_range needs to attend to that. */
sewardjf98e1c02008-10-25 16:22:41 +00004788
sewardj23f12002009-07-24 08:45:08 +00004789static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00004790{
4791 SizeT i;
4792 if (len == 0)
4793 return;
4794
4795 /* assert for non-overlappingness */
4796 tl_assert(src+len <= dst || dst+len <= src);
4797
4798 /* To be simple, just copy byte by byte. But so as not to wreck
4799 performance for later accesses to dst[0 .. len-1], normalise
4800 destination lines as we finish with them, and also normalise the
4801 line containing the first and last address. */
4802 for (i = 0; i < len; i++) {
4803 Bool normalise
4804 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
4805 || i == 0 /* first in range */
4806 || i == len-1; /* last in range */
sewardj23f12002009-07-24 08:45:08 +00004807 zsm_scopy08( src+i, dst+i, normalise );
sewardjf98e1c02008-10-25 16:22:41 +00004808 }
4809}
4810
4811
4812/* For setting address ranges to a given value. Has considerable
4813 sophistication so as to avoid generating large numbers of pointless
4814 cache loads/writebacks for large ranges. */
4815
4816/* Do small ranges in-cache, in the obvious way. */
4817static
sewardj23f12002009-07-24 08:45:08 +00004818void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00004819{
4820 /* fast track a couple of common cases */
4821 if (len == 4 && aligned32(a)) {
sewardj23f12002009-07-24 08:45:08 +00004822 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004823 return;
4824 }
4825 if (len == 8 && aligned64(a)) {
sewardj23f12002009-07-24 08:45:08 +00004826 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004827 return;
4828 }
4829
4830 /* be completely general (but as efficient as possible) */
4831 if (len == 0) return;
4832
4833 if (!aligned16(a) && len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00004834 zsm_swrite08( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004835 a += 1;
4836 len -= 1;
4837 tl_assert(aligned16(a));
4838 }
4839 if (len == 0) return;
4840
4841 if (!aligned32(a) && len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00004842 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004843 a += 2;
4844 len -= 2;
4845 tl_assert(aligned32(a));
4846 }
4847 if (len == 0) return;
4848
4849 if (!aligned64(a) && len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00004850 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004851 a += 4;
4852 len -= 4;
4853 tl_assert(aligned64(a));
4854 }
4855 if (len == 0) return;
4856
4857 if (len >= 8) {
4858 tl_assert(aligned64(a));
4859 while (len >= 8) {
sewardj23f12002009-07-24 08:45:08 +00004860 zsm_swrite64( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004861 a += 8;
4862 len -= 8;
4863 }
4864 tl_assert(aligned64(a));
4865 }
4866 if (len == 0) return;
4867
4868 if (len >= 4)
4869 tl_assert(aligned32(a));
4870 if (len >= 4) {
sewardj23f12002009-07-24 08:45:08 +00004871 zsm_swrite32( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004872 a += 4;
4873 len -= 4;
4874 }
4875 if (len == 0) return;
4876
4877 if (len >= 2)
4878 tl_assert(aligned16(a));
4879 if (len >= 2) {
sewardj23f12002009-07-24 08:45:08 +00004880 zsm_swrite16( a, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004881 a += 2;
4882 len -= 2;
4883 }
4884 if (len == 0) return;
4885
4886 if (len >= 1) {
sewardj23f12002009-07-24 08:45:08 +00004887 zsm_swrite08( a, svNew );
njn4c245e52009-03-15 23:25:38 +00004888 //a += 1;
sewardjf98e1c02008-10-25 16:22:41 +00004889 len -= 1;
4890 }
4891 tl_assert(len == 0);
4892}
4893
4894
sewardj23f12002009-07-24 08:45:08 +00004895/* If we're doing a small range, hand off to zsm_sset_range_SMALL. But
sewardjf98e1c02008-10-25 16:22:41 +00004896 for larger ranges, try to operate directly on the out-of-cache
4897 representation, rather than dragging lines into the cache,
4898 overwriting them, and forcing them out. This turns out to be an
sewardj23f12002009-07-24 08:45:08 +00004899 important performance optimisation.
sewardjf98e1c02008-10-25 16:22:41 +00004900
sewardj23f12002009-07-24 08:45:08 +00004901 Note that this doesn't change the filtering arrangements. The
4902 caller of zsm_sset_range needs to attend to that. */
4903
4904static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
sewardjf98e1c02008-10-25 16:22:41 +00004905{
4906 tl_assert(svNew != SVal_INVALID);
4907 stats__cache_make_New_arange += (ULong)len;
4908
4909 if (0 && len > 500)
4910 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4911
4912 if (0) {
4913 static UWord n_New_in_cache = 0;
4914 static UWord n_New_not_in_cache = 0;
4915 /* tag is 'a' with the in-line offset masked out,
4916 eg a[31]..a[4] 0000 */
4917 Addr tag = a & ~(N_LINE_ARANGE - 1);
4918 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4919 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4920 n_New_in_cache++;
4921 } else {
4922 n_New_not_in_cache++;
4923 }
4924 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4925 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4926 n_New_in_cache, n_New_not_in_cache );
4927 }
4928
4929 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
sewardj23f12002009-07-24 08:45:08 +00004930 zsm_sset_range_SMALL( a, len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004931 } else {
4932 Addr before_start = a;
4933 Addr aligned_start = cacheline_ROUNDUP(a);
4934 Addr after_start = cacheline_ROUNDDN(a + len);
4935 UWord before_len = aligned_start - before_start;
4936 UWord aligned_len = after_start - aligned_start;
4937 UWord after_len = a + len - after_start;
4938 tl_assert(before_start <= aligned_start);
4939 tl_assert(aligned_start <= after_start);
4940 tl_assert(before_len < N_LINE_ARANGE);
4941 tl_assert(after_len < N_LINE_ARANGE);
4942 tl_assert(get_cacheline_offset(aligned_start) == 0);
4943 if (get_cacheline_offset(a) == 0) {
4944 tl_assert(before_len == 0);
4945 tl_assert(a == aligned_start);
4946 }
4947 if (get_cacheline_offset(a+len) == 0) {
4948 tl_assert(after_len == 0);
4949 tl_assert(after_start == a+len);
4950 }
4951 if (before_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00004952 zsm_sset_range_SMALL( before_start, before_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004953 }
4954 if (after_len > 0) {
sewardj23f12002009-07-24 08:45:08 +00004955 zsm_sset_range_SMALL( after_start, after_len, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004956 }
4957 stats__cache_make_New_inZrep += (ULong)aligned_len;
4958
4959 while (1) {
4960 Addr tag;
4961 UWord wix;
4962 if (aligned_start >= after_start)
4963 break;
4964 tl_assert(get_cacheline_offset(aligned_start) == 0);
4965 tag = aligned_start & ~(N_LINE_ARANGE - 1);
4966 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
4967 if (tag == cache_shmem.tags0[wix]) {
4968 UWord i;
4969 for (i = 0; i < N_LINE_ARANGE / 8; i++)
sewardj23f12002009-07-24 08:45:08 +00004970 zsm_swrite64( aligned_start + i * 8, svNew );
sewardjf98e1c02008-10-25 16:22:41 +00004971 } else {
4972 UWord i;
4973 Word zix;
4974 SecMap* sm;
4975 LineZ* lineZ;
4976 /* This line is not in the cache. Do not force it in; instead
4977 modify it in-place. */
4978 /* find the Z line to write in and rcdec it or the
4979 associated F line. */
4980 find_Z_for_writing( &sm, &zix, tag );
4981 tl_assert(sm);
4982 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
4983 lineZ = &sm->linesZ[zix];
4984 lineZ->dict[0] = svNew;
4985 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
4986 for (i = 0; i < N_LINE_ARANGE/4; i++)
4987 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
4988 rcinc_LineZ(lineZ);
4989 }
4990 aligned_start += N_LINE_ARANGE;
4991 aligned_len -= N_LINE_ARANGE;
4992 }
4993 tl_assert(aligned_start == after_start);
4994 tl_assert(aligned_len == 0);
4995 }
4996}
4997
4998
4999/////////////////////////////////////////////////////////
5000// //
sewardj23f12002009-07-24 08:45:08 +00005001// Front-filtering accesses //
5002// //
5003/////////////////////////////////////////////////////////
5004
5005static UWord stats__f_ac = 0;
5006static UWord stats__f_sk = 0;
5007
5008#if 0
5009# define STATS__F_SHOW \
5010 do { \
5011 if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5012 VG_(printf)("filters: ac %lu sk %lu\n", \
5013 stats__f_ac, stats__f_sk); \
5014 } while (0)
5015#else
5016# define STATS__F_SHOW /* */
5017#endif
5018
5019void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5020 stats__f_ac++;
5021 STATS__F_SHOW;
5022 if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5023 stats__f_sk++;
5024 return;
5025 }
5026 zsm_sapply08__msmcwrite(thr, a);
5027}
5028
5029void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5030 stats__f_ac++;
5031 STATS__F_SHOW;
5032 if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5033 stats__f_sk++;
5034 return;
5035 }
5036 zsm_sapply16__msmcwrite(thr, a);
5037}
5038
5039void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5040 stats__f_ac++;
5041 STATS__F_SHOW;
5042 if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5043 stats__f_sk++;
5044 return;
5045 }
5046 zsm_sapply32__msmcwrite(thr, a);
5047}
5048
5049void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5050 stats__f_ac++;
5051 STATS__F_SHOW;
5052 if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5053 stats__f_sk++;
5054 return;
5055 }
5056 zsm_sapply64__msmcwrite(thr, a);
5057}
5058
5059void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5060{
5061 /* fast track a couple of common cases */
5062 if (len == 4 && aligned32(a)) {
5063 zsm_sapply32_f__msmcwrite( thr, a );
5064 return;
5065 }
5066 if (len == 8 && aligned64(a)) {
5067 zsm_sapply64_f__msmcwrite( thr, a );
5068 return;
5069 }
5070
5071 /* be completely general (but as efficient as possible) */
5072 if (len == 0) return;
5073
5074 if (!aligned16(a) && len >= 1) {
5075 zsm_sapply08_f__msmcwrite( thr, a );
5076 a += 1;
5077 len -= 1;
5078 tl_assert(aligned16(a));
5079 }
5080 if (len == 0) return;
5081
5082 if (!aligned32(a) && len >= 2) {
5083 zsm_sapply16_f__msmcwrite( thr, a );
5084 a += 2;
5085 len -= 2;
5086 tl_assert(aligned32(a));
5087 }
5088 if (len == 0) return;
5089
5090 if (!aligned64(a) && len >= 4) {
5091 zsm_sapply32_f__msmcwrite( thr, a );
5092 a += 4;
5093 len -= 4;
5094 tl_assert(aligned64(a));
5095 }
5096 if (len == 0) return;
5097
5098 if (len >= 8) {
5099 tl_assert(aligned64(a));
5100 while (len >= 8) {
5101 zsm_sapply64_f__msmcwrite( thr, a );
5102 a += 8;
5103 len -= 8;
5104 }
5105 tl_assert(aligned64(a));
5106 }
5107 if (len == 0) return;
5108
5109 if (len >= 4)
5110 tl_assert(aligned32(a));
5111 if (len >= 4) {
5112 zsm_sapply32_f__msmcwrite( thr, a );
5113 a += 4;
5114 len -= 4;
5115 }
5116 if (len == 0) return;
5117
5118 if (len >= 2)
5119 tl_assert(aligned16(a));
5120 if (len >= 2) {
5121 zsm_sapply16_f__msmcwrite( thr, a );
5122 a += 2;
5123 len -= 2;
5124 }
5125 if (len == 0) return;
5126
5127 if (len >= 1) {
5128 zsm_sapply08_f__msmcwrite( thr, a );
5129 //a += 1;
5130 len -= 1;
5131 }
5132 tl_assert(len == 0);
5133}
5134
5135void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5136 stats__f_ac++;
5137 STATS__F_SHOW;
5138 if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5139 stats__f_sk++;
5140 return;
5141 }
5142 zsm_sapply08__msmcread(thr, a);
5143}
5144
5145void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5146 stats__f_ac++;
5147 STATS__F_SHOW;
5148 if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5149 stats__f_sk++;
5150 return;
5151 }
5152 zsm_sapply16__msmcread(thr, a);
5153}
5154
5155void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5156 stats__f_ac++;
5157 STATS__F_SHOW;
5158 if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5159 stats__f_sk++;
5160 return;
5161 }
5162 zsm_sapply32__msmcread(thr, a);
5163}
5164
5165void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5166 stats__f_ac++;
5167 STATS__F_SHOW;
5168 if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5169 stats__f_sk++;
5170 return;
5171 }
5172 zsm_sapply64__msmcread(thr, a);
5173}
5174
5175void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5176{
5177 /* fast track a couple of common cases */
5178 if (len == 4 && aligned32(a)) {
5179 zsm_sapply32_f__msmcread( thr, a );
5180 return;
5181 }
5182 if (len == 8 && aligned64(a)) {
5183 zsm_sapply64_f__msmcread( thr, a );
5184 return;
5185 }
5186
5187 /* be completely general (but as efficient as possible) */
5188 if (len == 0) return;
5189
5190 if (!aligned16(a) && len >= 1) {
5191 zsm_sapply08_f__msmcread( thr, a );
5192 a += 1;
5193 len -= 1;
5194 tl_assert(aligned16(a));
5195 }
5196 if (len == 0) return;
5197
5198 if (!aligned32(a) && len >= 2) {
5199 zsm_sapply16_f__msmcread( thr, a );
5200 a += 2;
5201 len -= 2;
5202 tl_assert(aligned32(a));
5203 }
5204 if (len == 0) return;
5205
5206 if (!aligned64(a) && len >= 4) {
5207 zsm_sapply32_f__msmcread( thr, a );
5208 a += 4;
5209 len -= 4;
5210 tl_assert(aligned64(a));
5211 }
5212 if (len == 0) return;
5213
5214 if (len >= 8) {
5215 tl_assert(aligned64(a));
5216 while (len >= 8) {
5217 zsm_sapply64_f__msmcread( thr, a );
5218 a += 8;
5219 len -= 8;
5220 }
5221 tl_assert(aligned64(a));
5222 }
5223 if (len == 0) return;
5224
5225 if (len >= 4)
5226 tl_assert(aligned32(a));
5227 if (len >= 4) {
5228 zsm_sapply32_f__msmcread( thr, a );
5229 a += 4;
5230 len -= 4;
5231 }
5232 if (len == 0) return;
5233
5234 if (len >= 2)
5235 tl_assert(aligned16(a));
5236 if (len >= 2) {
5237 zsm_sapply16_f__msmcread( thr, a );
5238 a += 2;
5239 len -= 2;
5240 }
5241 if (len == 0) return;
5242
5243 if (len >= 1) {
5244 zsm_sapply08_f__msmcread( thr, a );
5245 //a += 1;
5246 len -= 1;
5247 }
5248 tl_assert(len == 0);
5249}
5250
5251void libhb_Thr_resumes ( Thr* thr )
5252{
5253 if (0) VG_(printf)("resume %p\n", thr);
5254 Filter__clear(thr->filter, "libhb_Thr_resumes");
5255 /* A kludge, but .. if this thread doesn't have any marker stacks
5256 at all, get one right now. This is easier than figuring out
5257 exactly when at thread startup we can and can't take a stack
5258 snapshot. */
sewardj8ab2c132009-08-02 09:34:35 +00005259 tl_assert(thr->local_Kws_n_stacks);
5260 if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
5261 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00005262}
5263
5264
5265/////////////////////////////////////////////////////////
5266// //
sewardjf98e1c02008-10-25 16:22:41 +00005267// Synchronisation objects //
5268// //
5269/////////////////////////////////////////////////////////
5270
5271// (UInt) `echo "Synchronisation object" | md5sum`
5272#define SO_MAGIC 0x56b3c5b0U
5273
5274struct _SO {
5275 VtsID viR; /* r-clock of sender */
5276 VtsID viW; /* w-clock of sender */
5277 UInt magic;
5278};
5279
5280static SO* SO__Alloc ( void ) {
5281 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
5282 so->viR = VtsID_INVALID;
5283 so->viW = VtsID_INVALID;
5284 so->magic = SO_MAGIC;
5285 return so;
5286}
5287static void SO__Dealloc ( SO* so ) {
5288 tl_assert(so);
5289 tl_assert(so->magic == SO_MAGIC);
5290 if (so->viR == VtsID_INVALID) {
5291 tl_assert(so->viW == VtsID_INVALID);
5292 } else {
5293 tl_assert(so->viW != VtsID_INVALID);
5294 VtsID__rcdec(so->viR);
5295 VtsID__rcdec(so->viW);
5296 }
5297 so->magic = 0;
5298 HG_(free)( so );
5299}
5300
5301
5302/////////////////////////////////////////////////////////
5303// //
5304// Top Level API //
5305// //
5306/////////////////////////////////////////////////////////
5307
5308static void show_thread_state ( HChar* str, Thr* t )
5309{
5310 if (1) return;
5311 if (t->viR == t->viW) {
5312 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
5313 VtsID__pp( t->viR );
5314 VG_(printf)("%s","\n");
5315 } else {
5316 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
5317 VtsID__pp( t->viR );
5318 VG_(printf)(" viW %u==", t->viW);
5319 VtsID__pp( t->viW );
5320 VG_(printf)("%s","\n");
5321 }
5322}
5323
5324
5325Thr* libhb_init (
5326 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00005327 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00005328 )
5329{
5330 Thr* thr;
5331 VtsID vi;
5332 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00005333 tl_assert(get_EC);
5334 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00005335 main_get_EC = get_EC;
5336
5337 // No need to initialise hg_wordfm.
5338 // No need to initialise hg_wordset.
5339
5340 vts_set_init();
5341 vts_tab_init();
5342 event_map_init();
5343 VtsID__invalidate_caches();
5344
5345 // initialise shadow memory
5346 zsm_init( SVal__rcinc, SVal__rcdec );
5347
5348 thr = Thr__new();
5349 vi = VtsID__mk_Singleton( thr, 1 );
5350 thr->viR = vi;
5351 thr->viW = vi;
5352 VtsID__rcinc(thr->viR);
5353 VtsID__rcinc(thr->viW);
5354
5355 show_thread_state(" root", thr);
5356 return thr;
5357}
5358
sewardj23f12002009-07-24 08:45:08 +00005359
sewardjf98e1c02008-10-25 16:22:41 +00005360Thr* libhb_create ( Thr* parent )
5361{
5362 /* The child's VTSs are copies of the parent's VTSs, but ticked at
5363 the child's index. Since the child's index is guaranteed
5364 unique, it has never been seen before, so the implicit value
5365 before the tick is zero and after that is one. */
5366 Thr* child = Thr__new();
5367
5368 child->viR = VtsID__tick( parent->viR, child );
5369 child->viW = VtsID__tick( parent->viW, child );
sewardj23f12002009-07-24 08:45:08 +00005370 Filter__clear(child->filter, "libhb_create(child)");
sewardjf98e1c02008-10-25 16:22:41 +00005371 VtsID__rcinc(child->viR);
5372 VtsID__rcinc(child->viW);
sewardj8ab2c132009-08-02 09:34:35 +00005373 /* We need to do note_local_Kw_n_stack_for( child ), but it's too
sewardj23f12002009-07-24 08:45:08 +00005374 early for that - it may not have a valid TId yet. So, let
5375 libhb_Thr_resumes pick it up the first time the thread runs. */
sewardjf98e1c02008-10-25 16:22:41 +00005376
5377 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
5378 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
5379
5380 /* and the parent has to move along too */
5381 VtsID__rcdec(parent->viR);
5382 VtsID__rcdec(parent->viW);
5383 parent->viR = VtsID__tick( parent->viR, parent );
5384 parent->viW = VtsID__tick( parent->viW, parent );
sewardj23f12002009-07-24 08:45:08 +00005385 Filter__clear(parent->filter, "libhb_create(parent)");
sewardjf98e1c02008-10-25 16:22:41 +00005386 VtsID__rcinc(parent->viR);
5387 VtsID__rcinc(parent->viW);
sewardj8ab2c132009-08-02 09:34:35 +00005388 note_local_Kw_n_stack_for( parent );
sewardjf98e1c02008-10-25 16:22:41 +00005389
5390 show_thread_state(" child", child);
5391 show_thread_state("parent", parent);
5392
5393 return child;
5394}
5395
5396/* Shut down the library, and print stats (in fact that's _all_
5397 this is for. */
5398void libhb_shutdown ( Bool show_stats )
5399{
5400 if (show_stats) {
5401 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
5402 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
5403 stats__secmaps_allocd,
5404 stats__secmap_ga_space_covered);
5405 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
5406 stats__secmap_linesZ_allocd,
5407 stats__secmap_linesZ_bytes);
5408 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
5409 stats__secmap_linesF_allocd,
5410 stats__secmap_linesF_bytes);
5411 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
5412 stats__secmap_iterator_steppings);
5413 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
5414 stats__secmaps_search, stats__secmaps_search_slow);
5415
5416 VG_(printf)("%s","\n");
5417 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
5418 stats__cache_totrefs, stats__cache_totmisses );
5419 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
5420 stats__cache_Z_fetches, stats__cache_F_fetches );
5421 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
5422 stats__cache_Z_wbacks, stats__cache_F_wbacks );
5423 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
5424 stats__cache_invals, stats__cache_flushes );
5425 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
5426 stats__cache_make_New_arange,
5427 stats__cache_make_New_inZrep);
5428
5429 VG_(printf)("%s","\n");
5430 VG_(printf)(" cline: %'10lu normalises\n",
5431 stats__cline_normalises );
sewardj23f12002009-07-24 08:45:08 +00005432 VG_(printf)(" cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5433 stats__cline_cread64s,
5434 stats__cline_cread32s,
5435 stats__cline_cread16s,
5436 stats__cline_cread08s );
5437 VG_(printf)(" cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5438 stats__cline_cwrite64s,
5439 stats__cline_cwrite32s,
5440 stats__cline_cwrite16s,
5441 stats__cline_cwrite08s );
5442 VG_(printf)(" cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
5443 stats__cline_swrite64s,
5444 stats__cline_swrite32s,
5445 stats__cline_swrite16s,
5446 stats__cline_swrite08s );
5447 VG_(printf)(" cline: s rd1s %'lu, s copy1s %'lu\n",
5448 stats__cline_sread08s, stats__cline_scopy08s );
sewardjf98e1c02008-10-25 16:22:41 +00005449 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5450 stats__cline_64to32splits,
5451 stats__cline_32to16splits,
5452 stats__cline_16to8splits );
5453 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
5454 stats__cline_64to32pulldown,
5455 stats__cline_32to16pulldown,
5456 stats__cline_16to8pulldown );
5457 if (0)
5458 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
5459 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
5460
5461 VG_(printf)("%s","\n");
5462
sewardj23f12002009-07-24 08:45:08 +00005463 VG_(printf)(" libhb: %'13llu msmcread (%'llu changed)\n",
5464 stats__msmcread, stats__msmcread_change);
5465 VG_(printf)(" libhb: %'13llu msmcwrite (%'llu changed)\n",
5466 stats__msmcwrite, stats__msmcwrite_change);
5467 VG_(printf)(" libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
5468 stats__cmpLEQ_queries, stats__cmpLEQ_misses);
sewardjf98e1c02008-10-25 16:22:41 +00005469 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
5470 stats__join2_queries, stats__join2_misses);
5471
5472 VG_(printf)("%s","\n");
5473 VG_(printf)(
5474 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
5475 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
5476 );
5477 VG_(printf)( " libhb: %lu entries in vts_set\n",
5478 VG_(sizeFM)( vts_set ) );
5479
5480 VG_(printf)("%s","\n");
5481 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
5482 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
5483 stats__ctxt_rcdec2,
5484 stats__ctxt_rcdec3 );
5485 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
5486 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
5487 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
5488 (UWord)N_RCEC_TAB,
5489 stats__ctxt_tab_curr );
5490 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
5491 stats__ctxt_tab_qs,
5492 stats__ctxt_tab_cmps );
5493#if 0
5494 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
5495 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
5496 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
5497 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
5498 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
5499 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
5500 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
5501 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
5502 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
5503 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
5504 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
5505 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
5506 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
5507 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
5508
5509 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
5510 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
5511 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
5512 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
5513#endif
5514
5515 VG_(printf)("%s","<<< END libhb stats >>>\n");
5516 VG_(printf)("%s","\n");
5517
5518 }
5519}
5520
5521void libhb_async_exit ( Thr* thr )
5522{
sewardj23f12002009-07-24 08:45:08 +00005523 tl_assert(thr);
5524 thr->still_alive = False;
sewardj8ab2c132009-08-02 09:34:35 +00005525 /* XXX free up Filter and local_Kws_n_stacks */
sewardjf98e1c02008-10-25 16:22:41 +00005526}
5527
5528/* Both Segs and SOs point to VTSs. However, there is no sharing, so
5529 a Seg that points at a VTS is its one-and-only owner, and ditto for
5530 a SO that points at a VTS. */
5531
5532SO* libhb_so_alloc ( void )
5533{
5534 return SO__Alloc();
5535}
5536
5537void libhb_so_dealloc ( SO* so )
5538{
5539 tl_assert(so);
5540 tl_assert(so->magic == SO_MAGIC);
5541 SO__Dealloc(so);
5542}
5543
5544/* See comments in libhb.h for details on the meaning of
5545 strong vs weak sends and strong vs weak receives. */
5546void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
5547{
5548 /* Copy the VTSs from 'thr' into the sync object, and then move
5549 the thread along one step. */
5550
5551 tl_assert(so);
5552 tl_assert(so->magic == SO_MAGIC);
5553
5554 /* stay sane .. a thread's read-clock must always lead or be the
5555 same as its write-clock */
sewardj23f12002009-07-24 08:45:08 +00005556 { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
5557 tl_assert(leq);
sewardjf98e1c02008-10-25 16:22:41 +00005558 }
5559
5560 /* since we're overwriting the VtsIDs in the SO, we need to drop
5561 any references made by the previous contents thereof */
5562 if (so->viR == VtsID_INVALID) {
5563 tl_assert(so->viW == VtsID_INVALID);
5564 so->viR = thr->viR;
5565 so->viW = thr->viW;
5566 VtsID__rcinc(so->viR);
5567 VtsID__rcinc(so->viW);
5568 } else {
5569 /* In a strong send, we dump any previous VC in the SO and
5570 install the sending thread's VC instead. For a weak send we
5571 must join2 with what's already there. */
5572 tl_assert(so->viW != VtsID_INVALID);
5573 VtsID__rcdec(so->viR);
5574 VtsID__rcdec(so->viW);
5575 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
5576 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
5577 VtsID__rcinc(so->viR);
5578 VtsID__rcinc(so->viW);
5579 }
5580
5581 /* move both parent clocks along */
5582 VtsID__rcdec(thr->viR);
5583 VtsID__rcdec(thr->viW);
5584 thr->viR = VtsID__tick( thr->viR, thr );
5585 thr->viW = VtsID__tick( thr->viW, thr );
sewardj23f12002009-07-24 08:45:08 +00005586 Filter__clear(thr->filter, "libhb_so_send");
5587 if (thr->still_alive)
sewardj8ab2c132009-08-02 09:34:35 +00005588 note_local_Kw_n_stack_for(thr);
sewardjf98e1c02008-10-25 16:22:41 +00005589 VtsID__rcinc(thr->viR);
5590 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00005591
sewardjf98e1c02008-10-25 16:22:41 +00005592 if (strong_send)
5593 show_thread_state("s-send", thr);
5594 else
5595 show_thread_state("w-send", thr);
5596}
5597
5598void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
5599{
5600 tl_assert(so);
5601 tl_assert(so->magic == SO_MAGIC);
5602
5603 if (so->viR != VtsID_INVALID) {
5604 tl_assert(so->viW != VtsID_INVALID);
5605
5606 /* Weak receive (basically, an R-acquisition of a R-W lock).
5607 This advances the read-clock of the receiver, but not the
5608 write-clock. */
5609 VtsID__rcdec(thr->viR);
5610 thr->viR = VtsID__join2( thr->viR, so->viR );
5611 VtsID__rcinc(thr->viR);
5612
sewardj90eb22e2009-07-28 20:22:18 +00005613 /* At one point (r10589) it seemed safest to tick the clocks for
5614 the receiving thread after the join. But on reflection, I
5615 wonder if that might cause it to 'overtake' constraints,
5616 which could lead to missing races. So, back out that part of
5617 r10589. */
5618 //VtsID__rcdec(thr->viR);
5619 //thr->viR = VtsID__tick( thr->viR, thr );
5620 //VtsID__rcinc(thr->viR);
sewardj23f12002009-07-24 08:45:08 +00005621
sewardjf98e1c02008-10-25 16:22:41 +00005622 /* For a strong receive, we also advance the receiver's write
5623 clock, which means the receive as a whole is essentially
5624 equivalent to a W-acquisition of a R-W lock. */
5625 if (strong_recv) {
5626 VtsID__rcdec(thr->viW);
5627 thr->viW = VtsID__join2( thr->viW, so->viW );
5628 VtsID__rcinc(thr->viW);
sewardj23f12002009-07-24 08:45:08 +00005629
sewardj90eb22e2009-07-28 20:22:18 +00005630 /* See comment just above, re r10589. */
5631 //VtsID__rcdec(thr->viW);
5632 //thr->viW = VtsID__tick( thr->viW, thr );
5633 //VtsID__rcinc(thr->viW);
sewardjf98e1c02008-10-25 16:22:41 +00005634 }
5635
sewardj23f12002009-07-24 08:45:08 +00005636 Filter__clear(thr->filter, "libhb_so_recv");
sewardj8ab2c132009-08-02 09:34:35 +00005637 note_local_Kw_n_stack_for(thr);
sewardj23f12002009-07-24 08:45:08 +00005638
sewardjf98e1c02008-10-25 16:22:41 +00005639 if (strong_recv)
5640 show_thread_state("s-recv", thr);
5641 else
5642 show_thread_state("w-recv", thr);
5643
5644 } else {
5645 tl_assert(so->viW == VtsID_INVALID);
5646 /* Deal with degenerate case: 'so' has no vts, so there has been
5647 no message posted to it. Just ignore this case. */
5648 show_thread_state("d-recv", thr);
5649 }
5650}
5651
5652Bool libhb_so_everSent ( SO* so )
5653{
5654 if (so->viR == VtsID_INVALID) {
5655 tl_assert(so->viW == VtsID_INVALID);
5656 return False;
5657 } else {
5658 tl_assert(so->viW != VtsID_INVALID);
5659 return True;
5660 }
5661}
5662
5663#define XXX1 0 // 0x67a106c
5664#define XXX2 0
5665
sewardj23f12002009-07-24 08:45:08 +00005666static inline Bool TRACEME(Addr a, SizeT szB) {
sewardjf98e1c02008-10-25 16:22:41 +00005667 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
5668 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
5669 return False;
5670}
5671static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
sewardj23f12002009-07-24 08:45:08 +00005672 SVal sv = zsm_sread08(a);
sewardjf98e1c02008-10-25 16:22:41 +00005673 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
5674 show_thread_state("", thr);
5675 VG_(printf)("%s","\n");
5676}
5677
sewardj23f12002009-07-24 08:45:08 +00005678void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00005679{
5680 SVal sv = SVal__mkC(thr->viW, thr->viW);
5681 tl_assert(is_sane_SVal_C(sv));
sewardj23f12002009-07-24 08:45:08 +00005682 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
5683 zsm_sset_range( a, szB, sv );
5684 Filter__clear_range( thr->filter, a, szB );
5685 if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
sewardjf98e1c02008-10-25 16:22:41 +00005686}
5687
sewardj23f12002009-07-24 08:45:08 +00005688void libhb_srange_noaccess ( Thr* thr, Addr a, SizeT szB )
sewardjf98e1c02008-10-25 16:22:41 +00005689{
sewardj23f12002009-07-24 08:45:08 +00005690 /* do nothing */
sewardjf98e1c02008-10-25 16:22:41 +00005691}
5692
5693void* libhb_get_Thr_opaque ( Thr* thr ) {
5694 tl_assert(thr);
5695 return thr->opaque;
5696}
5697
5698void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
5699 tl_assert(thr);
5700 thr->opaque = v;
5701}
5702
sewardj23f12002009-07-24 08:45:08 +00005703void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
sewardjf98e1c02008-10-25 16:22:41 +00005704{
sewardj23f12002009-07-24 08:45:08 +00005705 zsm_scopy_range(src, dst, len);
5706 Filter__clear_range( thr->filter, dst, len );
sewardjf98e1c02008-10-25 16:22:41 +00005707}
5708
5709void libhb_maybe_GC ( void )
5710{
5711 event_map_maybe_GC();
5712 /* If there are still freelist entries available, no need for a
5713 GC. */
5714 if (vts_tab_freelist != VtsID_INVALID)
5715 return;
5716 /* So all the table entries are full, and we're having to expand
5717 the table. But did we hit the threshhold point yet? */
5718 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
5719 return;
5720 vts_tab__do_GC( False/*don't show stats*/ );
5721}
5722
5723
5724/////////////////////////////////////////////////////////////////
5725/////////////////////////////////////////////////////////////////
5726// //
5727// SECTION END main library //
5728// //
5729/////////////////////////////////////////////////////////////////
5730/////////////////////////////////////////////////////////////////
5731
5732/*--------------------------------------------------------------------*/
5733/*--- end libhb_main.c ---*/
5734/*--------------------------------------------------------------------*/