blob: 09ff6c92a6cae907b341453077ba5eee58a79de8 [file] [log] [blame]
sewardjf98e1c02008-10-25 16:22:41 +00001
2/*--------------------------------------------------------------------*/
3/*--- LibHB: a library for implementing and checking ---*/
4/*--- the happens-before relationship in concurrent programs. ---*/
5/*--- libhb_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9 This file is part of LibHB, a library for implementing and checking
10 the happens-before relationship in concurrent programs.
11
12 Copyright (C) 2008-2008 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcbase.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_mallocfree.h"
38#include "pub_tool_wordfm.h"
39#include "pub_tool_xarray.h"
40#include "pub_tool_oset.h"
41#include "pub_tool_threadstate.h"
42#include "pub_tool_aspacemgr.h"
43#include "pub_tool_execontext.h"
44#include "pub_tool_errormgr.h"
sewardjd024ae52008-11-09 20:47:57 +000045#include "pub_tool_options.h" // VG_(clo_verbosity)
sewardjf98e1c02008-10-25 16:22:41 +000046#include "hg_basics.h"
47#include "hg_wordset.h"
48#include "hg_lock_n_thread.h"
49#include "hg_errors.h"
50
51#include "libhb.h"
52
53
54/* fwds for
55 Globals needed by other parts of the library. These are set
56 once at startup and then never changed. */
57static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
sewardjd52392d2008-11-08 20:36:26 +000058static ExeContext* (*main_get_EC)( Thr* ) = NULL;
sewardjf98e1c02008-10-25 16:22:41 +000059
60/////////////////////////////////////////////////////////////////
61/////////////////////////////////////////////////////////////////
62// //
63// //
64// //
65/////////////////////////////////////////////////////////////////
66/////////////////////////////////////////////////////////////////
67
68
69/////////////////////////////////////////////////////////////////
70/////////////////////////////////////////////////////////////////
71// //
72// SECTION BEGIN compressed shadow memory //
73// //
74/////////////////////////////////////////////////////////////////
75/////////////////////////////////////////////////////////////////
76
77#ifndef __HB_ZSM_H
78#define __HB_ZSM_H
79
80typedef ULong SVal;
81
82/* This value has special significance to the implementation, and callers
83 may not store it in the shadow memory. */
84#define SVal_INVALID (3ULL << 62)
85
86/* This is the default value for shadow memory. Initially the shadow
87 memory contains no accessible areas and so all reads produce this
88 value. TODO: make this caller-defineable. */
89#define SVal_NOACCESS (2ULL << 62)
90
91/* Initialise the library. Once initialised, it will (or may) call
92 rcinc and rcdec in response to all the calls below, in order to
93 allow the user to do reference counting on the SVals stored herein.
94 It is important to understand, however, that due to internal
95 caching, the reference counts are in general inaccurate, and can be
96 both above or below the true reference count for an item. In
97 particular, the library may indicate that the reference count for
98 an item is zero, when in fact it is not.
99
100 To make the reference counting exact and therefore non-pointless,
101 call zsm_flush_cache. Immediately after it returns, the reference
102 counts for all items, as deduced by the caller by observing calls
103 to rcinc and rcdec, will be correct, and so any items with a zero
104 reference count may be freed (or at least considered to be
105 unreferenced by this library).
106*/
107static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
108
109static void zsm_set_range ( Addr, SizeT, SVal );
110static SVal zsm_read8 ( Addr );
111static void zsm_copy_range ( Addr, Addr, SizeT );
112static void zsm_flush_cache ( void );
113
114#endif /* ! __HB_ZSM_H */
115
116
117/* For the shadow mem cache stuff we may want more intrusive
118 checks. Unfortunately there's no almost-zero-cost way to make them
119 selectable at run time. Hence set the #if 0 to #if 1 and
120 rebuild if you want them. */
121#if 0
122# define SCE_CACHELINE 1 /* do sanity-check CacheLine stuff */
123# define inline __attribute__((noinline))
124 /* probably want to ditch -fomit-frame-pointer too */
125#else
126# define SCE_CACHELINE 0 /* don't sanity-check CacheLine stuff */
127#endif
128
129/* For the SegmentID, SegmentSet and SVal stuff we may want more
130 intrusive checks. Again there's no zero cost way to do this. Set
131 the #if 0 to #if 1 and rebuild if you want them. */
132#if 0
133# define SCE_SVALS 1 /* sanity-check shadow value stuff */
134#else
135# define SCE_SVALS 0
136#endif
137
138
139/* Round a up to the next multiple of N. N must be a power of 2 */
140#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
141/* Round a down to the next multiple of N. N must be a power of 2 */
142#define ROUNDDN(a, N) ((a) & ~(N-1))
143
144
145
146/* ------ User-supplied RC functions ------ */
147static void(*rcinc)(SVal) = NULL;
148static void(*rcdec)(SVal) = NULL;
149
150
151/* ------ CacheLine ------ */
152
153#define N_LINE_BITS 6 /* must be >= 3 */
154#define N_LINE_ARANGE (1 << N_LINE_BITS)
155#define N_LINE_TREES (N_LINE_ARANGE >> 3)
156
157typedef
158 struct {
159 UShort descrs[N_LINE_TREES];
160 SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
161 }
162 CacheLine;
163
164#define TREE_DESCR_16_0 (1<<0)
165#define TREE_DESCR_32_0 (1<<1)
166#define TREE_DESCR_16_1 (1<<2)
167#define TREE_DESCR_64 (1<<3)
168#define TREE_DESCR_16_2 (1<<4)
169#define TREE_DESCR_32_1 (1<<5)
170#define TREE_DESCR_16_3 (1<<6)
171#define TREE_DESCR_8_0 (1<<7)
172#define TREE_DESCR_8_1 (1<<8)
173#define TREE_DESCR_8_2 (1<<9)
174#define TREE_DESCR_8_3 (1<<10)
175#define TREE_DESCR_8_4 (1<<11)
176#define TREE_DESCR_8_5 (1<<12)
177#define TREE_DESCR_8_6 (1<<13)
178#define TREE_DESCR_8_7 (1<<14)
179#define TREE_DESCR_DTY (1<<15)
180
181typedef
182 struct {
183 SVal dict[4]; /* can represent up to 4 diff values in the line */
184 UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
185 dict indexes */
186 /* if dict[0] == SVal_INVALID then dict[1] is the index of the
187 LineF to use, and dict[2..] are also SVal_INVALID. */
188 }
189 LineZ; /* compressed rep for a cache line */
190
191typedef
192 struct {
193 Bool inUse;
194 SVal w64s[N_LINE_ARANGE];
195 }
196 LineF; /* full rep for a cache line */
197
198/* Shadow memory.
199 Primary map is a WordFM Addr SecMap*.
200 SecMaps cover some page-size-ish section of address space and hold
201 a compressed representation.
202 CacheLine-sized chunks of SecMaps are copied into a Cache, being
203 decompressed when moved into the cache and recompressed on the
204 way out. Because of this, the cache must operate as a writeback
205 cache, not a writethrough one.
206
207 Each SecMap must hold a power-of-2 number of CacheLines. Hence
208 N_SECMAP_BITS must >= N_LINE_BITS.
209*/
210#define N_SECMAP_BITS 13
211#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
212
213// # CacheLines held by a SecMap
214#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
215
216/* The data in the SecMap is held in the array of LineZs. Each LineZ
217 either carries the required data directly, in a compressed
218 representation, or it holds (in .dict[0]) an index to the LineF in
219 .linesF that holds the full representation.
220
221 Currently-unused LineF's have their .inUse bit set to zero.
222 Since each in-use LineF is referred to be exactly one LineZ,
223 the number of .linesZ[] that refer to .linesF should equal
224 the number of .linesF[] that have .inUse == True.
225
226 RC obligations: the RCs presented to the user include exactly
227 the values in:
228 * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
229 * F reps that are in use (.inUse == True)
230
231 Hence the following actions at the following transitions are required:
232
233 F rep: .inUse==True -> .inUse==False -- rcdec_LineF
234 F rep: .inUse==False -> .inUse==True -- rcinc_LineF
235 Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
236 Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
237*/
238typedef
239 struct {
240 UInt magic;
241 LineZ linesZ[N_SECMAP_ZLINES];
242 LineF* linesF;
243 UInt linesF_size;
244 }
245 SecMap;
246
247#define SecMap_MAGIC 0x571e58cbU
248
249static inline Bool is_sane_SecMap ( SecMap* sm ) {
250 return sm != NULL && sm->magic == SecMap_MAGIC;
251}
252
253/* ------ Cache ------ */
254
255#define N_WAY_BITS 16
256#define N_WAY_NENT (1 << N_WAY_BITS)
257
258/* Each tag is the address of the associated CacheLine, rounded down
259 to a CacheLine address boundary. A CacheLine size must be a power
260 of 2 and must be 8 or more. Hence an easy way to initialise the
261 cache so it is empty is to set all the tag values to any value % 8
262 != 0, eg 1. This means all queries in the cache initially miss.
263 It does however require us to detect and not writeback, any line
264 with a bogus tag. */
265typedef
266 struct {
267 CacheLine lyns0[N_WAY_NENT];
268 Addr tags0[N_WAY_NENT];
269 }
270 Cache;
271
272static inline Bool is_valid_scache_tag ( Addr tag ) {
273 /* a valid tag should be naturally aligned to the start of
274 a CacheLine. */
275 return 0 == (tag & (N_LINE_ARANGE - 1));
276}
277
278
279/* --------- Primary data structures --------- */
280
281/* Shadow memory primary map */
282static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
283static Cache cache_shmem;
284
285
286static UWord stats__secmaps_search = 0; // # SM finds
287static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
288static UWord stats__secmaps_allocd = 0; // # SecMaps issued
289static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
290static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
291static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
292static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
293static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
294static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
295static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
296static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
297static UWord stats__cache_F_fetches = 0; // # F lines fetched
298static UWord stats__cache_F_wbacks = 0; // # F lines written back
299static UWord stats__cache_invals = 0; // # cache invals
300static UWord stats__cache_flushes = 0; // # cache flushes
301static UWord stats__cache_totrefs = 0; // # total accesses
302static UWord stats__cache_totmisses = 0; // # misses
303static ULong stats__cache_make_New_arange = 0; // total arange made New
304static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
305static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
306static UWord stats__cline_read64s = 0; // # calls to s_m_read64
307static UWord stats__cline_read32s = 0; // # calls to s_m_read32
308static UWord stats__cline_read16s = 0; // # calls to s_m_read16
309static UWord stats__cline_read8s = 0; // # calls to s_m_read8
310static UWord stats__cline_write64s = 0; // # calls to s_m_write64
311static UWord stats__cline_write32s = 0; // # calls to s_m_write32
312static UWord stats__cline_write16s = 0; // # calls to s_m_write16
313static UWord stats__cline_write8s = 0; // # calls to s_m_write8
314static UWord stats__cline_set64s = 0; // # calls to s_m_set64
315static UWord stats__cline_set32s = 0; // # calls to s_m_set32
316static UWord stats__cline_set16s = 0; // # calls to s_m_set16
317static UWord stats__cline_set8s = 0; // # calls to s_m_set8
318static UWord stats__cline_get8s = 0; // # calls to s_m_get8
319static UWord stats__cline_copy8s = 0; // # calls to s_m_copy8
320static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
321static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
322static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
323static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
324static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
325static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
326
327static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
328 return a & ~(N_SECMAP_ARANGE - 1);
329}
330static inline UWord shmem__get_SecMap_offset ( Addr a ) {
331 return a & (N_SECMAP_ARANGE - 1);
332}
333
334
335/*----------------------------------------------------------------*/
336/*--- map_shmem :: WordFM Addr SecMap ---*/
337/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
338/*----------------------------------------------------------------*/
339
340/*--------------- SecMap allocation --------------- */
341
342static HChar* shmem__bigchunk_next = NULL;
343static HChar* shmem__bigchunk_end1 = NULL;
344
345static void* shmem__bigchunk_alloc ( SizeT n )
346{
347 const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
348 tl_assert(n > 0);
349 n = VG_ROUNDUP(n, 16);
350 tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
351 tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
352 <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
353 if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
354 if (0)
355 VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
356 (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
357 shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
358 if (shmem__bigchunk_next == NULL)
359 VG_(out_of_memory_NORETURN)(
360 "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
361 shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
362 }
363 tl_assert(shmem__bigchunk_next);
364 tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
365 tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
366 shmem__bigchunk_next += n;
367 return shmem__bigchunk_next - n;
368}
369
370static SecMap* shmem__alloc_SecMap ( void )
371{
372 Word i, j;
373 SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
374 if (0) VG_(printf)("alloc_SecMap %p\n",sm);
375 tl_assert(sm);
376 sm->magic = SecMap_MAGIC;
377 for (i = 0; i < N_SECMAP_ZLINES; i++) {
378 sm->linesZ[i].dict[0] = SVal_NOACCESS;
379 sm->linesZ[i].dict[1] = SVal_INVALID;
380 sm->linesZ[i].dict[2] = SVal_INVALID;
381 sm->linesZ[i].dict[3] = SVal_INVALID;
382 for (j = 0; j < N_LINE_ARANGE/4; j++)
383 sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
384 }
385 sm->linesF = NULL;
386 sm->linesF_size = 0;
387 stats__secmaps_allocd++;
388 stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
389 stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
390 stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
391 return sm;
392}
393
394typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
395static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
396
397static SecMap* shmem__find_SecMap ( Addr ga )
398{
399 SecMap* sm = NULL;
400 Addr gaKey = shmem__round_to_SecMap_base(ga);
401 // Cache
402 stats__secmaps_search++;
403 if (LIKELY(gaKey == smCache[0].gaKey))
404 return smCache[0].sm;
405 if (LIKELY(gaKey == smCache[1].gaKey)) {
406 SMCacheEnt tmp = smCache[0];
407 smCache[0] = smCache[1];
408 smCache[1] = tmp;
409 return smCache[0].sm;
410 }
411 if (gaKey == smCache[2].gaKey) {
412 SMCacheEnt tmp = smCache[1];
413 smCache[1] = smCache[2];
414 smCache[2] = tmp;
415 return smCache[1].sm;
416 }
417 // end Cache
418 stats__secmaps_search_slow++;
419 if (VG_(lookupFM)( map_shmem,
420 NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
421 tl_assert(sm != NULL);
422 smCache[2] = smCache[1];
423 smCache[1] = smCache[0];
424 smCache[0].gaKey = gaKey;
425 smCache[0].sm = sm;
426 } else {
427 tl_assert(sm == NULL);
428 }
429 return sm;
430}
431
432static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
433{
434 SecMap* sm = shmem__find_SecMap ( ga );
435 if (LIKELY(sm)) {
436 return sm;
437 } else {
438 /* create a new one */
439 Addr gaKey = shmem__round_to_SecMap_base(ga);
440 sm = shmem__alloc_SecMap();
441 tl_assert(sm);
442 VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
443 return sm;
444 }
445}
446
447
448/* ------------ LineF and LineZ related ------------ */
449
450static void rcinc_LineF ( LineF* lineF ) {
451 UWord i;
452 tl_assert(lineF->inUse);
453 for (i = 0; i < N_LINE_ARANGE; i++)
454 rcinc(lineF->w64s[i]);
455}
456
457static void rcdec_LineF ( LineF* lineF ) {
458 UWord i;
459 tl_assert(lineF->inUse);
460 for (i = 0; i < N_LINE_ARANGE; i++)
461 rcdec(lineF->w64s[i]);
462}
463
464static void rcinc_LineZ ( LineZ* lineZ ) {
465 tl_assert(lineZ->dict[0] != SVal_INVALID);
466 rcinc(lineZ->dict[0]);
467 if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
468 if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
469 if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
470}
471
472static void rcdec_LineZ ( LineZ* lineZ ) {
473 tl_assert(lineZ->dict[0] != SVal_INVALID);
474 rcdec(lineZ->dict[0]);
475 if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
476 if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
477 if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
478}
479
480inline
481static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
482 Word bix, shft, mask, prep;
483 tl_assert(ix >= 0);
484 bix = ix >> 2;
485 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
486 mask = 3 << shft;
487 prep = b2 << shft;
488 arr[bix] = (arr[bix] & ~mask) | prep;
489}
490
491inline
492static UWord read_twobit_array ( UChar* arr, UWord ix ) {
493 Word bix, shft;
494 tl_assert(ix >= 0);
495 bix = ix >> 2;
496 shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
497 return (arr[bix] >> shft) & 3;
498}
499
500/* Given address 'tag', find either the Z or F line containing relevant
501 data, so it can be read into the cache.
502*/
503static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
504 /*OUT*/LineF** fp, Addr tag ) {
505 LineZ* lineZ;
506 LineF* lineF;
507 UWord zix;
508 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
509 UWord smoff = shmem__get_SecMap_offset(tag);
510 /* since smoff is derived from a valid tag, it should be
511 cacheline-aligned. */
512 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
513 zix = smoff >> N_LINE_BITS;
514 tl_assert(zix < N_SECMAP_ZLINES);
515 lineZ = &sm->linesZ[zix];
516 lineF = NULL;
517 if (lineZ->dict[0] == SVal_INVALID) {
518 UInt fix = (UInt)lineZ->dict[1];
519 tl_assert(sm->linesF);
520 tl_assert(sm->linesF_size > 0);
521 tl_assert(fix >= 0 && fix < sm->linesF_size);
522 lineF = &sm->linesF[fix];
523 tl_assert(lineF->inUse);
524 lineZ = NULL;
525 }
526 *zp = lineZ;
527 *fp = lineF;
528}
529
530/* Given address 'tag', return the relevant SecMap and the index of
531 the LineZ within it, in the expectation that the line is to be
532 overwritten. Regardless of whether 'tag' is currently associated
533 with a Z or F representation, to rcdec on the current
534 representation, in recognition of the fact that the contents are
535 just about to be overwritten. */
536static __attribute__((noinline))
537void find_Z_for_writing ( /*OUT*/SecMap** smp,
538 /*OUT*/Word* zixp,
539 Addr tag ) {
540 LineZ* lineZ;
541 LineF* lineF;
542 UWord zix;
543 SecMap* sm = shmem__find_or_alloc_SecMap(tag);
544 UWord smoff = shmem__get_SecMap_offset(tag);
545 /* since smoff is derived from a valid tag, it should be
546 cacheline-aligned. */
547 tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
548 zix = smoff >> N_LINE_BITS;
549 tl_assert(zix < N_SECMAP_ZLINES);
550 lineZ = &sm->linesZ[zix];
551 lineF = NULL;
552 /* re RCs, we are freeing up this LineZ/LineF so that new data can
553 be parked in it. Hence have to rcdec it accordingly. */
554 /* If lineZ has an associated lineF, free it up. */
555 if (lineZ->dict[0] == SVal_INVALID) {
556 UInt fix = (UInt)lineZ->dict[1];
557 tl_assert(sm->linesF);
558 tl_assert(sm->linesF_size > 0);
559 tl_assert(fix >= 0 && fix < sm->linesF_size);
560 lineF = &sm->linesF[fix];
561 tl_assert(lineF->inUse);
562 rcdec_LineF(lineF);
563 lineF->inUse = False;
564 } else {
565 rcdec_LineZ(lineZ);
566 }
567 *smp = sm;
568 *zixp = zix;
569}
570
571static __attribute__((noinline))
572void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
573 UInt i, new_size;
574 LineF* nyu;
575
576 if (sm->linesF) {
577 tl_assert(sm->linesF_size > 0);
578 } else {
579 tl_assert(sm->linesF_size == 0);
580 }
581
582 if (sm->linesF) {
583 for (i = 0; i < sm->linesF_size; i++) {
584 if (!sm->linesF[i].inUse) {
585 *fixp = (Word)i;
586 return;
587 }
588 }
589 }
590
591 /* No free F line found. Expand existing array and try again. */
592 new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
593 nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
594 new_size * sizeof(LineF) );
595 tl_assert(nyu);
596
597 stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
598 stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
599 * sizeof(LineF);
600
601 if (0)
602 VG_(printf)("SM %p: expand F array from %d to %d\n",
603 sm, (Int)sm->linesF_size, new_size);
604
605 for (i = 0; i < new_size; i++)
606 nyu[i].inUse = False;
607
608 if (sm->linesF) {
609 for (i = 0; i < sm->linesF_size; i++) {
610 tl_assert(sm->linesF[i].inUse);
611 nyu[i] = sm->linesF[i];
612 }
613 VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
614 HG_(free)(sm->linesF);
615 }
616
617 sm->linesF = nyu;
618 sm->linesF_size = new_size;
619
620 for (i = 0; i < sm->linesF_size; i++) {
621 if (!sm->linesF[i].inUse) {
622 *fixp = (Word)i;
623 return;
624 }
625 }
626
627 /*NOTREACHED*/
628 tl_assert(0);
629}
630
631
632/* ------------ CacheLine and implicit-tree related ------------ */
633
634__attribute__((unused))
635static void pp_CacheLine ( CacheLine* cl ) {
636 Word i;
637 if (!cl) {
638 VG_(printf)("%s","pp_CacheLine(NULL)\n");
639 return;
640 }
641 for (i = 0; i < N_LINE_TREES; i++)
642 VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
643 for (i = 0; i < N_LINE_ARANGE; i++)
644 VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
645}
646
647static UChar descr_to_validbits ( UShort descr )
648{
649 /* a.k.a Party Time for gcc's constant folder */
650# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
651 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
652 ( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
653 ( (b8_5) << 12) | ( (b8_4) << 11) | \
654 ( (b8_3) << 10) | ( (b8_2) << 9) | \
655 ( (b8_1) << 8) | ( (b8_0) << 7) | \
656 ( (b16_3) << 6) | ( (b32_1) << 5) | \
657 ( (b16_2) << 4) | ( (b64) << 3) | \
658 ( (b16_1) << 2) | ( (b32_0) << 1) | \
659 ( (b16_0) << 0) ) )
660
661# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
662 ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
663 ( (bit5) << 5) | ( (bit4) << 4) | \
664 ( (bit3) << 3) | ( (bit2) << 2) | \
665 ( (bit1) << 1) | ( (bit0) << 0) ) )
666
667 /* these should all get folded out at compile time */
668 tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
669 tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
670 tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
671 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
672 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
673 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
674 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
675 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
676 tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
677
678 switch (descr) {
679 /*
680 +--------------------------------- TREE_DESCR_8_7
681 | +------------------- TREE_DESCR_8_0
682 | | +---------------- TREE_DESCR_16_3
683 | | | +-------------- TREE_DESCR_32_1
684 | | | | +------------ TREE_DESCR_16_2
685 | | | | | +--------- TREE_DESCR_64
686 | | | | | | +------ TREE_DESCR_16_1
687 | | | | | | | +---- TREE_DESCR_32_0
688 | | | | | | | | +-- TREE_DESCR_16_0
689 | | | | | | | | |
690 | | | | | | | | | GRANULARITY, 7 -> 0 */
691 case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
692 return BYTE(1,1,1,1,1,1,1,1);
693 case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
694 return BYTE(1,1,0,1,1,1,1,1);
695 case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
696 return BYTE(0,1,1,1,1,1,1,1);
697 case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
698 return BYTE(0,1,0,1,1,1,1,1);
699
700 case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
701 return BYTE(1,1,1,1,1,1,0,1);
702 case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
703 return BYTE(1,1,0,1,1,1,0,1);
704 case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
705 return BYTE(0,1,1,1,1,1,0,1);
706 case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
707 return BYTE(0,1,0,1,1,1,0,1);
708
709 case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
710 return BYTE(1,1,1,1,0,1,1,1);
711 case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
712 return BYTE(1,1,0,1,0,1,1,1);
713 case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
714 return BYTE(0,1,1,1,0,1,1,1);
715 case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
716 return BYTE(0,1,0,1,0,1,1,1);
717
718 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
719 return BYTE(1,1,1,1,0,1,0,1);
720 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
721 return BYTE(1,1,0,1,0,1,0,1);
722 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
723 return BYTE(0,1,1,1,0,1,0,1);
724 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
725 return BYTE(0,1,0,1,0,1,0,1);
726
727 case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
728 return BYTE(0,0,0,1,1,1,1,1);
729 case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
730 return BYTE(0,0,0,1,1,1,0,1);
731 case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
732 return BYTE(0,0,0,1,0,1,1,1);
733 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
734 return BYTE(0,0,0,1,0,1,0,1);
735
736 case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
737 return BYTE(1,1,1,1,0,0,0,1);
738 case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
739 return BYTE(1,1,0,1,0,0,0,1);
740 case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
741 return BYTE(0,1,1,1,0,0,0,1);
742 case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
743 return BYTE(0,1,0,1,0,0,0,1);
744
745 case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
746 return BYTE(0,0,0,1,0,0,0,1);
747
748 case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
749 return BYTE(0,0,0,0,0,0,0,1);
750
751 default: return BYTE(0,0,0,0,0,0,0,0);
752 /* INVALID - any valid descr produces at least one
753 valid bit in tree[0..7]*/
754 }
755 /* NOTREACHED*/
756 tl_assert(0);
757
758# undef DESCR
759# undef BYTE
760}
761
762__attribute__((unused))
763static Bool is_sane_Descr ( UShort descr ) {
764 return descr_to_validbits(descr) != 0;
765}
766
767static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
768 VG_(sprintf)(dst,
769 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
770 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
771 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
772 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
773 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
774 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
775 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
776 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
777 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
778 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
779 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
780 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
781 (Int)((descr & TREE_DESCR_64) ? 1 : 0),
782 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
783 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
784 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
785 );
786}
787static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
788 VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
789 (Int)((byte & 128) ? 1 : 0),
790 (Int)((byte & 64) ? 1 : 0),
791 (Int)((byte & 32) ? 1 : 0),
792 (Int)((byte & 16) ? 1 : 0),
793 (Int)((byte & 8) ? 1 : 0),
794 (Int)((byte & 4) ? 1 : 0),
795 (Int)((byte & 2) ? 1 : 0),
796 (Int)((byte & 1) ? 1 : 0)
797 );
798}
799
800static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
801 Word i;
802 UChar validbits = descr_to_validbits(descr);
803 HChar buf[128], buf2[128];
804 if (validbits == 0)
805 goto bad;
806 for (i = 0; i < 8; i++) {
807 if (validbits & (1<<i)) {
808 if (tree[i] == SVal_INVALID)
809 goto bad;
810 } else {
811 if (tree[i] != SVal_INVALID)
812 goto bad;
813 }
814 }
815 return True;
816 bad:
817 sprintf_Descr( buf, descr );
818 sprintf_Byte( buf2, validbits );
819 VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
820 VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
821 VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
822 for (i = 0; i < 8; i++)
823 VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
824 VG_(printf)("%s","}\n");
825 return 0;
826}
827
828static Bool is_sane_CacheLine ( CacheLine* cl )
829{
830 Word tno, cloff;
831
832 if (!cl) goto bad;
833
834 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
835 UShort descr = cl->descrs[tno];
836 SVal* tree = &cl->svals[cloff];
837 if (!is_sane_Descr_and_Tree(descr, tree))
838 goto bad;
839 }
840 tl_assert(cloff == N_LINE_ARANGE);
841 return True;
842 bad:
843 pp_CacheLine(cl);
844 return False;
845}
846
847static UShort normalise_tree ( /*MOD*/SVal* tree )
848{
849 UShort descr;
850 /* pre: incoming tree[0..7] does not have any invalid shvals, in
851 particular no zeroes. */
852 if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
853 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
854 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
855 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
856 tl_assert(0);
857
858 descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
859 | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
860 | TREE_DESCR_8_1 | TREE_DESCR_8_0;
861 /* build 16-bit layer */
862 if (tree[1] == tree[0]) {
863 tree[1] = SVal_INVALID;
864 descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
865 descr |= TREE_DESCR_16_0;
866 }
867 if (tree[3] == tree[2]) {
868 tree[3] = SVal_INVALID;
869 descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
870 descr |= TREE_DESCR_16_1;
871 }
872 if (tree[5] == tree[4]) {
873 tree[5] = SVal_INVALID;
874 descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
875 descr |= TREE_DESCR_16_2;
876 }
877 if (tree[7] == tree[6]) {
878 tree[7] = SVal_INVALID;
879 descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
880 descr |= TREE_DESCR_16_3;
881 }
882 /* build 32-bit layer */
883 if (tree[2] == tree[0]
884 && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
885 tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
886 descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
887 descr |= TREE_DESCR_32_0;
888 }
889 if (tree[6] == tree[4]
890 && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
891 tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
892 descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
893 descr |= TREE_DESCR_32_1;
894 }
895 /* build 64-bit layer */
896 if (tree[4] == tree[0]
897 && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
898 tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
899 descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
900 descr |= TREE_DESCR_64;
901 }
902 return descr;
903}
904
905/* This takes a cacheline where all the data is at the leaves
906 (w8[..]) and builds a correctly normalised tree. */
907static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
908{
909 Word tno, cloff;
910 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
911 SVal* tree = &cl->svals[cloff];
912 cl->descrs[tno] = normalise_tree( tree );
913 }
914 tl_assert(cloff == N_LINE_ARANGE);
915 if (SCE_CACHELINE)
916 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
917 stats__cline_normalises++;
918}
919
920
921typedef struct { UChar count; SVal sval; } CountedSVal;
922
923static
924void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
925 /*OUT*/Word* dstUsedP,
926 Word nDst, CacheLine* src )
927{
928 Word tno, cloff, dstUsed;
929
930 tl_assert(nDst == N_LINE_ARANGE);
931 dstUsed = 0;
932
933 for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
934 UShort descr = src->descrs[tno];
935 SVal* tree = &src->svals[cloff];
936
937 /* sequentialise the tree described by (descr,tree). */
938# define PUT(_n,_v) \
939 do { dst[dstUsed ].count = (_n); \
940 dst[dstUsed++].sval = (_v); \
941 } while (0)
942
943 /* byte 0 */
944 if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
945 if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
946 if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
947 if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
948 /* byte 1 */
949 if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
950 /* byte 2 */
951 if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
952 if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
953 /* byte 3 */
954 if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
955 /* byte 4 */
956 if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
957 if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
958 if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
959 /* byte 5 */
960 if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
961 /* byte 6 */
962 if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
963 if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
964 /* byte 7 */
965 if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
966
967# undef PUT
968 /* END sequentialise the tree described by (descr,tree). */
969
970 }
971 tl_assert(cloff == N_LINE_ARANGE);
972 tl_assert(dstUsed <= nDst);
973
974 *dstUsedP = dstUsed;
975}
976
977/* Write the cacheline 'wix' to backing store. Where it ends up
978 is determined by its tag field. */
979static __attribute__((noinline)) void cacheline_wback ( UWord wix )
980{
981 Word i, j, k, m;
982 Addr tag;
983 SecMap* sm;
984 CacheLine* cl;
985 LineZ* lineZ;
986 LineF* lineF;
987 Word zix, fix, csvalsUsed;
988 CountedSVal csvals[N_LINE_ARANGE];
989 SVal sv;
990
991 if (0)
992 VG_(printf)("scache wback line %d\n", (Int)wix);
993
994 tl_assert(wix >= 0 && wix < N_WAY_NENT);
995
996 tag = cache_shmem.tags0[wix];
997 cl = &cache_shmem.lyns0[wix];
998
999 /* The cache line may have been invalidated; if so, ignore it. */
1000 if (!is_valid_scache_tag(tag))
1001 return;
1002
1003 /* Where are we going to put it? */
1004 sm = NULL;
1005 lineZ = NULL;
1006 lineF = NULL;
1007 zix = fix = -1;
1008
1009 /* find the Z line to write in and rcdec it or the associated F
1010 line. */
1011 find_Z_for_writing( &sm, &zix, tag );
1012
1013 tl_assert(sm);
1014 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1015 lineZ = &sm->linesZ[zix];
1016
1017 /* Generate the data to be stored */
1018 if (SCE_CACHELINE)
1019 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1020
1021 csvalsUsed = -1;
1022 sequentialise_CacheLine( csvals, &csvalsUsed,
1023 N_LINE_ARANGE, cl );
1024 tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1025 if (0) VG_(printf)("%lu ", csvalsUsed);
1026
1027 lineZ->dict[0] = lineZ->dict[1]
1028 = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1029
1030 /* i indexes actual shadow values, k is cursor in csvals */
1031 i = 0;
1032 for (k = 0; k < csvalsUsed; k++) {
1033
1034 sv = csvals[k].sval;
1035 if (SCE_SVALS)
1036 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1037 /* do we already have it? */
1038 if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1039 if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1040 if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1041 if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1042 /* no. look for a free slot. */
1043 if (SCE_SVALS)
1044 tl_assert(sv != SVal_INVALID);
1045 if (lineZ->dict[0]
1046 == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1047 if (lineZ->dict[1]
1048 == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1049 if (lineZ->dict[2]
1050 == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1051 if (lineZ->dict[3]
1052 == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1053 break; /* we'll have to use the f rep */
1054 dict_ok:
1055 m = csvals[k].count;
1056 if (m == 8) {
1057 write_twobit_array( lineZ->ix2s, i+0, j );
1058 write_twobit_array( lineZ->ix2s, i+1, j );
1059 write_twobit_array( lineZ->ix2s, i+2, j );
1060 write_twobit_array( lineZ->ix2s, i+3, j );
1061 write_twobit_array( lineZ->ix2s, i+4, j );
1062 write_twobit_array( lineZ->ix2s, i+5, j );
1063 write_twobit_array( lineZ->ix2s, i+6, j );
1064 write_twobit_array( lineZ->ix2s, i+7, j );
1065 i += 8;
1066 }
1067 else if (m == 4) {
1068 write_twobit_array( lineZ->ix2s, i+0, j );
1069 write_twobit_array( lineZ->ix2s, i+1, j );
1070 write_twobit_array( lineZ->ix2s, i+2, j );
1071 write_twobit_array( lineZ->ix2s, i+3, j );
1072 i += 4;
1073 }
1074 else if (m == 1) {
1075 write_twobit_array( lineZ->ix2s, i+0, j );
1076 i += 1;
1077 }
1078 else if (m == 2) {
1079 write_twobit_array( lineZ->ix2s, i+0, j );
1080 write_twobit_array( lineZ->ix2s, i+1, j );
1081 i += 2;
1082 }
1083 else {
1084 tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1085 }
1086
1087 }
1088
1089 if (LIKELY(i == N_LINE_ARANGE)) {
1090 /* Construction of the compressed representation was
1091 successful. */
1092 rcinc_LineZ(lineZ);
1093 stats__cache_Z_wbacks++;
1094 } else {
1095 /* Cannot use the compressed(z) representation. Use the full(f)
1096 rep instead. */
1097 tl_assert(i >= 0 && i < N_LINE_ARANGE);
1098 alloc_F_for_writing( sm, &fix );
1099 tl_assert(sm->linesF);
1100 tl_assert(sm->linesF_size > 0);
1101 tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1102 lineF = &sm->linesF[fix];
1103 tl_assert(!lineF->inUse);
1104 lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1105 lineZ->dict[1] = (SVal)fix;
1106 lineF->inUse = True;
1107 i = 0;
1108 for (k = 0; k < csvalsUsed; k++) {
1109 if (SCE_SVALS)
1110 tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1111 sv = csvals[k].sval;
1112 if (SCE_SVALS)
1113 tl_assert(sv != SVal_INVALID);
1114 for (m = csvals[k].count; m > 0; m--) {
1115 lineF->w64s[i] = sv;
1116 i++;
1117 }
1118 }
1119 tl_assert(i == N_LINE_ARANGE);
1120 rcinc_LineF(lineF);
1121 stats__cache_F_wbacks++;
1122 }
1123
1124 //if (anyShared)
1125 // sm->mbHasShared = True;
1126
1127 /* mb_tidy_one_cacheline(); */
1128}
1129
1130/* Fetch the cacheline 'wix' from the backing store. The tag
1131 associated with 'wix' is assumed to have already been filled in;
1132 hence that is used to determine where in the backing store to read
1133 from. */
1134static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1135{
1136 Word i;
1137 Addr tag;
1138 CacheLine* cl;
1139 LineZ* lineZ;
1140 LineF* lineF;
1141
1142 if (0)
1143 VG_(printf)("scache fetch line %d\n", (Int)wix);
1144
1145 tl_assert(wix >= 0 && wix < N_WAY_NENT);
1146
1147 tag = cache_shmem.tags0[wix];
1148 cl = &cache_shmem.lyns0[wix];
1149
1150 /* reject nonsense requests */
1151 tl_assert(is_valid_scache_tag(tag));
1152
1153 lineZ = NULL;
1154 lineF = NULL;
1155 find_ZF_for_reading( &lineZ, &lineF, tag );
1156 tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1157
1158 /* expand the data into the bottom layer of the tree, then get
1159 cacheline_normalise to build the descriptor array. */
1160 if (lineF) {
1161 tl_assert(lineF->inUse);
1162 for (i = 0; i < N_LINE_ARANGE; i++) {
1163 cl->svals[i] = lineF->w64s[i];
1164 }
1165 stats__cache_F_fetches++;
1166 } else {
1167 for (i = 0; i < N_LINE_ARANGE; i++) {
1168 SVal sv;
1169 UWord ix = read_twobit_array( lineZ->ix2s, i );
1170 /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1171 sv = lineZ->dict[ix];
1172 tl_assert(sv != SVal_INVALID);
1173 cl->svals[i] = sv;
1174 }
1175 stats__cache_Z_fetches++;
1176 }
1177 normalise_CacheLine( cl );
1178}
1179
1180static void shmem__invalidate_scache ( void ) {
1181 Word wix;
1182 if (0) VG_(printf)("%s","scache inval\n");
1183 tl_assert(!is_valid_scache_tag(1));
1184 for (wix = 0; wix < N_WAY_NENT; wix++) {
1185 cache_shmem.tags0[wix] = 1/*INVALID*/;
1186 }
1187 stats__cache_invals++;
1188}
1189
1190static void shmem__flush_and_invalidate_scache ( void ) {
1191 Word wix;
1192 Addr tag;
1193 if (0) VG_(printf)("%s","scache flush and invalidate\n");
1194 tl_assert(!is_valid_scache_tag(1));
1195 for (wix = 0; wix < N_WAY_NENT; wix++) {
1196 tag = cache_shmem.tags0[wix];
1197 if (tag == 1/*INVALID*/) {
1198 /* already invalid; nothing to do */
1199 } else {
1200 tl_assert(is_valid_scache_tag(tag));
1201 cacheline_wback( wix );
1202 }
1203 cache_shmem.tags0[wix] = 1/*INVALID*/;
1204 }
1205 stats__cache_flushes++;
1206 stats__cache_invals++;
1207}
1208
1209
1210static inline Bool aligned16 ( Addr a ) {
1211 return 0 == (a & 1);
1212}
1213static inline Bool aligned32 ( Addr a ) {
1214 return 0 == (a & 3);
1215}
1216static inline Bool aligned64 ( Addr a ) {
1217 return 0 == (a & 7);
1218}
1219static inline UWord get_cacheline_offset ( Addr a ) {
1220 return (UWord)(a & (N_LINE_ARANGE - 1));
1221}
1222static inline Addr cacheline_ROUNDUP ( Addr a ) {
1223 return ROUNDUP(a, N_LINE_ARANGE);
1224}
1225static inline Addr cacheline_ROUNDDN ( Addr a ) {
1226 return ROUNDDN(a, N_LINE_ARANGE);
1227}
1228static inline UWord get_treeno ( Addr a ) {
1229 return get_cacheline_offset(a) >> 3;
1230}
1231static inline UWord get_tree_offset ( Addr a ) {
1232 return a & 7;
1233}
1234
1235static __attribute__((noinline))
1236 CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
1237static inline CacheLine* get_cacheline ( Addr a )
1238{
1239 /* tag is 'a' with the in-line offset masked out,
1240 eg a[31]..a[4] 0000 */
1241 Addr tag = a & ~(N_LINE_ARANGE - 1);
1242 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1243 stats__cache_totrefs++;
1244 if (LIKELY(tag == cache_shmem.tags0[wix])) {
1245 return &cache_shmem.lyns0[wix];
1246 } else {
1247 return get_cacheline_MISS( a );
1248 }
1249}
1250
1251static __attribute__((noinline))
1252 CacheLine* get_cacheline_MISS ( Addr a )
1253{
1254 /* tag is 'a' with the in-line offset masked out,
1255 eg a[31]..a[4] 0000 */
1256
1257 CacheLine* cl;
1258 Addr* tag_old_p;
1259 Addr tag = a & ~(N_LINE_ARANGE - 1);
1260 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1261
1262 tl_assert(tag != cache_shmem.tags0[wix]);
1263
1264 /* Dump the old line into the backing store. */
1265 stats__cache_totmisses++;
1266
1267 cl = &cache_shmem.lyns0[wix];
1268 tag_old_p = &cache_shmem.tags0[wix];
1269
1270 if (is_valid_scache_tag( *tag_old_p )) {
1271 /* EXPENSIVE and REDUNDANT: callee does it */
1272 if (SCE_CACHELINE)
1273 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1274 cacheline_wback( wix );
1275 }
1276 /* and reload the new one */
1277 *tag_old_p = tag;
1278 cacheline_fetch( wix );
1279 if (SCE_CACHELINE)
1280 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1281 return cl;
1282}
1283
1284static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1285 stats__cline_64to32pulldown++;
1286 switch (toff) {
1287 case 0: case 4:
1288 tl_assert(descr & TREE_DESCR_64);
1289 tree[4] = tree[0];
1290 descr &= ~TREE_DESCR_64;
1291 descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1292 break;
1293 default:
1294 tl_assert(0);
1295 }
1296 return descr;
1297}
1298
1299static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1300 stats__cline_32to16pulldown++;
1301 switch (toff) {
1302 case 0: case 2:
1303 if (!(descr & TREE_DESCR_32_0)) {
1304 descr = pulldown_to_32(tree, 0, descr);
1305 }
1306 tl_assert(descr & TREE_DESCR_32_0);
1307 tree[2] = tree[0];
1308 descr &= ~TREE_DESCR_32_0;
1309 descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1310 break;
1311 case 4: case 6:
1312 if (!(descr & TREE_DESCR_32_1)) {
1313 descr = pulldown_to_32(tree, 4, descr);
1314 }
1315 tl_assert(descr & TREE_DESCR_32_1);
1316 tree[6] = tree[4];
1317 descr &= ~TREE_DESCR_32_1;
1318 descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1319 break;
1320 default:
1321 tl_assert(0);
1322 }
1323 return descr;
1324}
1325
1326static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1327 stats__cline_16to8pulldown++;
1328 switch (toff) {
1329 case 0: case 1:
1330 if (!(descr & TREE_DESCR_16_0)) {
1331 descr = pulldown_to_16(tree, 0, descr);
1332 }
1333 tl_assert(descr & TREE_DESCR_16_0);
1334 tree[1] = tree[0];
1335 descr &= ~TREE_DESCR_16_0;
1336 descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1337 break;
1338 case 2: case 3:
1339 if (!(descr & TREE_DESCR_16_1)) {
1340 descr = pulldown_to_16(tree, 2, descr);
1341 }
1342 tl_assert(descr & TREE_DESCR_16_1);
1343 tree[3] = tree[2];
1344 descr &= ~TREE_DESCR_16_1;
1345 descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1346 break;
1347 case 4: case 5:
1348 if (!(descr & TREE_DESCR_16_2)) {
1349 descr = pulldown_to_16(tree, 4, descr);
1350 }
1351 tl_assert(descr & TREE_DESCR_16_2);
1352 tree[5] = tree[4];
1353 descr &= ~TREE_DESCR_16_2;
1354 descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1355 break;
1356 case 6: case 7:
1357 if (!(descr & TREE_DESCR_16_3)) {
1358 descr = pulldown_to_16(tree, 6, descr);
1359 }
1360 tl_assert(descr & TREE_DESCR_16_3);
1361 tree[7] = tree[6];
1362 descr &= ~TREE_DESCR_16_3;
1363 descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1364 break;
1365 default:
1366 tl_assert(0);
1367 }
1368 return descr;
1369}
1370
1371
1372static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1373 UShort mask;
1374 switch (toff) {
1375 case 0:
1376 mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1377 tl_assert( (descr & mask) == mask );
1378 descr &= ~mask;
1379 descr |= TREE_DESCR_16_0;
1380 break;
1381 case 2:
1382 mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1383 tl_assert( (descr & mask) == mask );
1384 descr &= ~mask;
1385 descr |= TREE_DESCR_16_1;
1386 break;
1387 case 4:
1388 mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1389 tl_assert( (descr & mask) == mask );
1390 descr &= ~mask;
1391 descr |= TREE_DESCR_16_2;
1392 break;
1393 case 6:
1394 mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1395 tl_assert( (descr & mask) == mask );
1396 descr &= ~mask;
1397 descr |= TREE_DESCR_16_3;
1398 break;
1399 default:
1400 tl_assert(0);
1401 }
1402 return descr;
1403}
1404
1405static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1406 UShort mask;
1407 switch (toff) {
1408 case 0:
1409 if (!(descr & TREE_DESCR_16_0))
1410 descr = pullup_descr_to_16(descr, 0);
1411 if (!(descr & TREE_DESCR_16_1))
1412 descr = pullup_descr_to_16(descr, 2);
1413 mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1414 tl_assert( (descr & mask) == mask );
1415 descr &= ~mask;
1416 descr |= TREE_DESCR_32_0;
1417 break;
1418 case 4:
1419 if (!(descr & TREE_DESCR_16_2))
1420 descr = pullup_descr_to_16(descr, 4);
1421 if (!(descr & TREE_DESCR_16_3))
1422 descr = pullup_descr_to_16(descr, 6);
1423 mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1424 tl_assert( (descr & mask) == mask );
1425 descr &= ~mask;
1426 descr |= TREE_DESCR_32_1;
1427 break;
1428 default:
1429 tl_assert(0);
1430 }
1431 return descr;
1432}
1433
1434static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1435 switch (toff) {
1436 case 0: case 4:
1437 return 0 != (descr & TREE_DESCR_64);
1438 default:
1439 tl_assert(0);
1440 }
1441}
1442
1443static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1444 switch (toff) {
1445 case 0:
1446 return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1447 case 2:
1448 return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1449 case 4:
1450 return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1451 case 6:
1452 return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1453 default:
1454 tl_assert(0);
1455 }
1456}
1457
1458/* ------------ Cache management ------------ */
1459
1460static void zsm_flush_cache ( void )
1461{
1462 shmem__flush_and_invalidate_scache();
1463}
1464
1465
1466static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1467{
1468 tl_assert( sizeof(UWord) == sizeof(Addr) );
1469
1470 rcinc = p_rcinc;
1471 rcdec = p_rcdec;
1472
1473 tl_assert(map_shmem == NULL);
1474 map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1475 HG_(free),
1476 NULL/*unboxed UWord cmp*/);
1477 tl_assert(map_shmem != NULL);
1478 shmem__invalidate_scache();
1479
1480 /* a SecMap must contain an integral number of CacheLines */
1481 tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1482 /* also ... a CacheLine holds an integral number of trees */
1483 tl_assert(0 == (N_LINE_ARANGE % 8));
1484}
1485
1486/////////////////////////////////////////////////////////////////
1487/////////////////////////////////////////////////////////////////
1488// //
1489// SECTION END compressed shadow memory //
1490// //
1491/////////////////////////////////////////////////////////////////
1492/////////////////////////////////////////////////////////////////
1493
1494
1495
1496/////////////////////////////////////////////////////////////////
1497/////////////////////////////////////////////////////////////////
1498// //
1499// SECTION BEGIN vts primitives //
1500// //
1501/////////////////////////////////////////////////////////////////
1502/////////////////////////////////////////////////////////////////
1503
1504#ifndef __HB_VTS_H
1505#define __HB_VTS_H
1506
1507/* VtsIDs can't exceed 30 bits, since they have to be packed into the
1508 lowest 30 bits of an SVal. */
1509typedef UInt VtsID;
1510#define VtsID_INVALID 0xFFFFFFFF
1511
1512/* A VTS contains .ts, its vector clock, and also .id, a field to hold
1513 a backlink for the caller's convenience. Since we have no idea
1514 what to set that to in the library, it always gets set to
1515 VtsID_INVALID. */
1516typedef
1517 struct {
1518 VtsID id;
1519 XArray* ts; /* XArray* ScalarTS(abstract) */
1520 }
1521 VTS;
1522
1523
1524/* Create a new, empty VTS. */
1525VTS* VTS__new ( void );
1526
1527/* Delete this VTS in its entirety. */
1528void VTS__delete ( VTS* vts );
1529
1530/* Create a new singleton VTS. */
1531VTS* VTS__singleton ( Thr* thr, ULong tym );
1532
1533/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1534 not modified. */
1535VTS* VTS__tick ( Thr* me, VTS* vts );
1536
1537/* Return a new VTS constructed as the join (max) of the 2 args.
1538 Neither arg is modified. */
1539VTS* VTS__join ( VTS* a, VTS* b );
1540
1541/* Compute the partial ordering relation of the two args. */
1542typedef
1543 enum { POrd_EQ=4, POrd_LT, POrd_GT, POrd_UN }
1544 POrd;
1545
1546POrd VTS__cmp ( VTS* a, VTS* b );
1547
1548/* Compute an arbitrary structural (total) ordering on the two args,
1549 based on their VCs, so they can be looked up in a table, tree, etc.
1550 Returns -1, 0 or 1. */
1551Word VTS__cmp_structural ( VTS* a, VTS* b );
1552
1553/* Debugging only. Display the given VTS in the buffer. */
1554void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
1555
1556/* Debugging only. Return vts[index], so to speak. */
sewardj8669fd32008-10-27 21:42:36 +00001557ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
sewardjf98e1c02008-10-25 16:22:41 +00001558
1559#endif /* ! __HB_VTS_H */
1560
1561
1562/*--------------- to do with Vector Timestamps ---------------*/
1563
1564/* Scalar Timestamp */
1565typedef
1566 struct {
1567 Thr* thr;
1568 ULong tym;
1569 }
1570 ScalarTS;
1571
1572
1573static Bool is_sane_VTS ( VTS* vts )
1574{
1575 UWord i, n;
1576 ScalarTS *st1, *st2;
1577 if (!vts) return False;
1578 if (!vts->ts) return False;
1579 n = VG_(sizeXA)( vts->ts );
1580 if (n >= 2) {
1581 for (i = 0; i < n-1; i++) {
1582 st1 = VG_(indexXA)( vts->ts, i );
1583 st2 = VG_(indexXA)( vts->ts, i+1 );
1584 if (st1->thr >= st2->thr)
1585 return False;
1586 if (st1->tym == 0 || st2->tym == 0)
1587 return False;
1588 }
1589 }
1590 return True;
1591}
1592
1593
1594/* Create a new, empty VTS.
1595*/
1596VTS* VTS__new ( void )
1597{
1598 VTS* vts;
1599 vts = HG_(zalloc)( "libhb.VTS__new.1", sizeof(VTS) );
1600 tl_assert(vts);
1601 vts->id = VtsID_INVALID;
1602 vts->ts = VG_(newXA)( HG_(zalloc), "libhb.VTS__new.2",
1603 HG_(free), sizeof(ScalarTS) );
1604 tl_assert(vts->ts);
1605 return vts;
1606}
1607
1608
1609/* Delete this VTS in its entirety.
1610*/
1611void VTS__delete ( VTS* vts )
1612{
1613 tl_assert(vts);
1614 tl_assert(vts->ts);
1615 VG_(deleteXA)( vts->ts );
1616 HG_(free)(vts);
1617}
1618
1619
1620/* Create a new singleton VTS.
1621*/
1622VTS* VTS__singleton ( Thr* thr, ULong tym ) {
1623 ScalarTS st;
1624 VTS* vts;
1625 tl_assert(thr);
1626 tl_assert(tym >= 1);
1627 vts = VTS__new();
1628 st.thr = thr;
1629 st.tym = tym;
1630 VG_(addToXA)( vts->ts, &st );
1631 return vts;
1632}
1633
1634
1635/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
1636 not modified.
1637*/
1638VTS* VTS__tick ( Thr* me, VTS* vts )
1639{
1640 ScalarTS* here = NULL;
1641 ScalarTS tmp;
1642 VTS* res;
1643 Word i, n;
1644 tl_assert(me);
1645 tl_assert(is_sane_VTS(vts));
1646 //if (0) VG_(printf)("tick vts thrno %ld szin %d\n",
1647 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(vts) );
1648 res = VTS__new();
1649 n = VG_(sizeXA)( vts->ts );
1650
1651 /* main loop doesn't handle zero-entry case correctly, so
1652 special-case it. */
1653 if (n == 0) {
1654 tmp.thr = me;
1655 tmp.tym = 1;
1656 VG_(addToXA)( res->ts, &tmp );
1657 tl_assert(is_sane_VTS(res));
1658 return res;
1659 }
1660
1661 for (i = 0; i < n; i++) {
1662 here = VG_(indexXA)( vts->ts, i );
1663 if (me < here->thr) {
1664 /* We just went past 'me', without seeing it. */
1665 tmp.thr = me;
1666 tmp.tym = 1;
1667 VG_(addToXA)( res->ts, &tmp );
1668 tmp = *here;
1669 VG_(addToXA)( res->ts, &tmp );
1670 i++;
1671 break;
1672 }
1673 else if (me == here->thr) {
1674 tmp = *here;
1675 tmp.tym++;
1676 VG_(addToXA)( res->ts, &tmp );
1677 i++;
1678 break;
1679 }
1680 else /* me > here->thr */ {
1681 tmp = *here;
1682 VG_(addToXA)( res->ts, &tmp );
1683 }
1684 }
1685 tl_assert(i >= 0 && i <= n);
1686 if (i == n && here && here->thr < me) {
1687 tmp.thr = me;
1688 tmp.tym = 1;
1689 VG_(addToXA)( res->ts, &tmp );
1690 } else {
1691 for (/*keepgoing*/; i < n; i++) {
1692 here = VG_(indexXA)( vts->ts, i );
1693 tmp = *here;
1694 VG_(addToXA)( res->ts, &tmp );
1695 }
1696 }
1697 tl_assert(is_sane_VTS(res));
1698 //if (0) VG_(printf)("tick vts thrno %ld szou %d\n",
1699 // (Word)me->errmsg_index, (Int)VG_(sizeXA)(res) );
1700 return res;
1701}
1702
1703
1704/* Return a new VTS constructed as the join (max) of the 2 args.
1705 Neither arg is modified.
1706*/
1707VTS* VTS__join ( VTS* a, VTS* b )
1708{
1709 Word ia, ib, useda, usedb;
1710 ULong tyma, tymb, tymMax;
1711 Thr* thr;
1712 VTS* res;
1713 ScalarTS *tmpa, *tmpb;
1714
1715 tl_assert(a && a->ts);
1716 tl_assert(b && b->ts);
1717 useda = VG_(sizeXA)( a->ts );
1718 usedb = VG_(sizeXA)( b->ts );
1719
1720 res = VTS__new();
1721 ia = ib = 0;
1722
1723 while (1) {
1724
1725 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1726 from a and b in order, where thr is the next Thr*
1727 occurring in either a or b, and tyma/b are the relevant
1728 scalar timestamps, taking into account implicit zeroes. */
1729 tl_assert(ia >= 0 && ia <= useda);
1730 tl_assert(ib >= 0 && ib <= usedb);
1731 tmpa = tmpb = NULL;
1732
1733 if (ia == useda && ib == usedb) {
1734 /* both empty - done */
1735 break;
1736 }
1737 else
1738 if (ia == useda && ib != usedb) {
1739 /* a empty, use up b */
1740 tmpb = VG_(indexXA)( b->ts, ib );
1741 thr = tmpb->thr;
1742 tyma = 0;
1743 tymb = tmpb->tym;
1744 ib++;
1745 }
1746 else
1747 if (ia != useda && ib == usedb) {
1748 /* b empty, use up a */
1749 tmpa = VG_(indexXA)( a->ts, ia );
1750 thr = tmpa->thr;
1751 tyma = tmpa->tym;
1752 tymb = 0;
1753 ia++;
1754 }
1755 else {
1756 /* both not empty; extract lowest-Thr*'d triple */
1757 tmpa = VG_(indexXA)( a->ts, ia );
1758 tmpb = VG_(indexXA)( b->ts, ib );
1759 if (tmpa->thr < tmpb->thr) {
1760 /* a has the lowest unconsidered Thr* */
1761 thr = tmpa->thr;
1762 tyma = tmpa->tym;
1763 tymb = 0;
1764 ia++;
1765 }
1766 else
1767 if (tmpa->thr > tmpb->thr) {
1768 /* b has the lowest unconsidered Thr* */
1769 thr = tmpb->thr;
1770 tyma = 0;
1771 tymb = tmpb->tym;
1772 ib++;
1773 } else {
1774 /* they both next mention the same Thr* */
1775 tl_assert(tmpa->thr == tmpb->thr);
1776 thr = tmpa->thr; /* == tmpb->thr */
1777 tyma = tmpa->tym;
1778 tymb = tmpb->tym;
1779 ia++;
1780 ib++;
1781 }
1782 }
1783
1784 /* having laboriously determined (thr, tyma, tymb), do something
1785 useful with it. */
1786 tymMax = tyma > tymb ? tyma : tymb;
1787 if (tymMax > 0) {
1788 ScalarTS st;
1789 st.thr = thr;
1790 st.tym = tymMax;
1791 VG_(addToXA)( res->ts, &st );
1792 }
1793
1794 }
1795
1796 tl_assert(is_sane_VTS( res ));
1797
1798 return res;
1799}
1800
1801
1802/* Compute the partial ordering relation of the two args.
1803*/
1804POrd VTS__cmp ( VTS* a, VTS* b )
1805{
1806 Word ia, ib, useda, usedb;
1807 ULong tyma, tymb;
1808 Thr* thr;
1809 ScalarTS *tmpa, *tmpb;
1810
1811 Bool all_leq = True;
1812 Bool all_geq = True;
1813
1814 tl_assert(a && a->ts);
1815 tl_assert(b && b->ts);
1816 useda = VG_(sizeXA)( a->ts );
1817 usedb = VG_(sizeXA)( b->ts );
1818
1819 ia = ib = 0;
1820
1821 while (1) {
1822
1823 /* This logic is to enumerate triples (thr, tyma, tymb) drawn
1824 from a and b in order, where thr is the next Thr*
1825 occurring in either a or b, and tyma/b are the relevant
1826 scalar timestamps, taking into account implicit zeroes. */
1827 tl_assert(ia >= 0 && ia <= useda);
1828 tl_assert(ib >= 0 && ib <= usedb);
1829 tmpa = tmpb = NULL;
1830
1831 if (ia == useda && ib == usedb) {
1832 /* both empty - done */
1833 break;
1834 }
1835 else
1836 if (ia == useda && ib != usedb) {
1837 /* a empty, use up b */
1838 tmpb = VG_(indexXA)( b->ts, ib );
1839 thr = tmpb->thr;
1840 tyma = 0;
1841 tymb = tmpb->tym;
1842 ib++;
1843 }
1844 else
1845 if (ia != useda && ib == usedb) {
1846 /* b empty, use up a */
1847 tmpa = VG_(indexXA)( a->ts, ia );
1848 thr = tmpa->thr;
1849 tyma = tmpa->tym;
1850 tymb = 0;
1851 ia++;
1852 }
1853 else {
1854 /* both not empty; extract lowest-Thr*'d triple */
1855 tmpa = VG_(indexXA)( a->ts, ia );
1856 tmpb = VG_(indexXA)( b->ts, ib );
1857 if (tmpa->thr < tmpb->thr) {
1858 /* a has the lowest unconsidered Thr* */
1859 thr = tmpa->thr;
1860 tyma = tmpa->tym;
1861 tymb = 0;
1862 ia++;
1863 }
1864 else
1865 if (tmpa->thr > tmpb->thr) {
1866 /* b has the lowest unconsidered Thr* */
1867 thr = tmpb->thr;
1868 tyma = 0;
1869 tymb = tmpb->tym;
1870 ib++;
1871 } else {
1872 /* they both next mention the same Thr* */
1873 tl_assert(tmpa->thr == tmpb->thr);
1874 thr = tmpa->thr; /* == tmpb->thr */
1875 tyma = tmpa->tym;
1876 tymb = tmpb->tym;
1877 ia++;
1878 ib++;
1879 }
1880 }
1881
1882 /* having laboriously determined (thr, tyma, tymb), do something
1883 useful with it. */
1884 if (tyma < tymb)
1885 all_geq = False;
1886 if (tyma > tymb)
1887 all_leq = False;
1888 }
1889
1890 if (all_leq && all_geq)
1891 return POrd_EQ;
1892 /* now we know they aren't equal, so either all_leq or all_geq or
1893 both are false. */
1894 if (all_leq)
1895 return POrd_LT;
1896 if (all_geq)
1897 return POrd_GT;
1898 /* hmm, neither all_geq or all_leq. This means unordered. */
1899 return POrd_UN;
1900}
1901
1902
1903/* Compute an arbitrary structural (total) ordering on the two args,
1904 based on their VCs, so they can be looked up in a table, tree, etc.
1905 Returns -1, 0 or 1. (really just 'deriving Ord' :-)
1906*/
1907Word VTS__cmp_structural ( VTS* a, VTS* b )
1908{
1909 /* We just need to generate an arbitrary total ordering based on
1910 a->ts and b->ts. Preferably do it in a way which comes across likely
1911 differences relatively quickly. */
1912 Word i, useda, usedb;
1913 ScalarTS *tmpa, *tmpb;
1914
1915 tl_assert(a && a->ts);
1916 tl_assert(b && b->ts);
1917 useda = VG_(sizeXA)( a->ts );
1918 usedb = VG_(sizeXA)( b->ts );
1919
1920 if (useda < usedb) return -1;
1921 if (useda > usedb) return 1;
1922
1923 /* Same length vectors, so let's step through them together. */
1924 tl_assert(useda == usedb);
1925 for (i = 0; i < useda; i++) {
1926 tmpa = VG_(indexXA)( a->ts, i );
1927 tmpb = VG_(indexXA)( b->ts, i );
1928 if (tmpa->tym < tmpb->tym) return -1;
1929 if (tmpa->tym > tmpb->tym) return 1;
1930 if (tmpa->thr < tmpb->thr) return -1;
1931 if (tmpa->thr > tmpb->thr) return 1;
1932 }
1933
1934 /* They're identical. */
1935 return 0;
1936}
1937
1938
1939/* Debugging only. Display the given VTS in the buffer.
1940*/
1941void VTS__show ( HChar* buf, Int nBuf, VTS* vts ) {
1942 ScalarTS* st;
1943 HChar unit[64];
1944 Word i, n;
1945 Int avail = nBuf;
1946 tl_assert(vts && vts->ts);
1947 tl_assert(nBuf > 16);
1948 buf[0] = '[';
1949 buf[1] = 0;
1950 n = VG_(sizeXA)( vts->ts );
1951 for (i = 0; i < n; i++) {
1952 tl_assert(avail >= 40);
1953 st = VG_(indexXA)( vts->ts, i );
1954 VG_(memset)(unit, 0, sizeof(unit));
1955 VG_(sprintf)(unit, i < n-1 ? "%p:%lld " : "%p:%lld",
1956 st->thr, st->tym);
1957 if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
1958 VG_(strcat)(buf, " ...]");
1959 buf[nBuf-1] = 0;
1960 return;
1961 }
1962 VG_(strcat)(buf, unit);
1963 avail -= VG_(strlen)(unit);
1964 }
1965 VG_(strcat)(buf, "]");
1966 buf[nBuf-1] = 0;
1967}
1968
1969
1970/* Debugging only. Return vts[index], so to speak.
1971*/
1972ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx ) {
1973 UWord i, n;
1974 tl_assert(vts && vts->ts);
1975 n = VG_(sizeXA)( vts->ts );
1976 for (i = 0; i < n; i++) {
1977 ScalarTS* st = VG_(indexXA)( vts->ts, i );
1978 if (st->thr == idx)
1979 return st->tym;
1980 }
1981 return 0;
1982}
1983
1984
1985/////////////////////////////////////////////////////////////////
1986/////////////////////////////////////////////////////////////////
1987// //
1988// SECTION END vts primitives //
1989// //
1990/////////////////////////////////////////////////////////////////
1991/////////////////////////////////////////////////////////////////
1992
1993
1994
1995/////////////////////////////////////////////////////////////////
1996/////////////////////////////////////////////////////////////////
1997// //
1998// SECTION BEGIN main library //
1999// //
2000/////////////////////////////////////////////////////////////////
2001/////////////////////////////////////////////////////////////////
2002
2003
2004/////////////////////////////////////////////////////////
2005// //
2006// VTS set //
2007// //
2008/////////////////////////////////////////////////////////
2009
2010static WordFM* /* VTS* void void */ vts_set = NULL;
2011
2012static void vts_set_init ( void )
2013{
2014 tl_assert(!vts_set);
2015 vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2016 HG_(free),
2017 (Word(*)(UWord,UWord))VTS__cmp_structural );
2018 tl_assert(vts_set);
2019}
2020
2021/* Given a newly made VTS, look in vts_set to see if we already have
2022 an identical one. If yes, free up this one and return instead a
2023 pointer to the existing one. If no, add this one to the set and
2024 return the same pointer. Caller differentiates the two cases by
2025 comparing returned pointer with the supplied one (although that
2026 does require that the supplied VTS is not already in the set).
2027*/
2028static VTS* vts_set__find_and_dealloc__or_add ( VTS* cand )
2029{
2030 UWord keyW, valW;
2031 /* lookup cand (by value) */
2032 if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2033 /* found it */
2034 tl_assert(valW == 0);
2035 /* if this fails, cand (by ref) was already present (!) */
2036 tl_assert(keyW != (UWord)cand);
2037 VTS__delete(cand);
2038 return (VTS*)keyW;
2039 } else {
2040 /* not present. Add and return pointer to same. */
2041 VG_(addToFM)( vts_set, (UWord)cand, 0/*val is unused*/ );
2042 return cand;
2043 }
2044}
2045
2046
2047/////////////////////////////////////////////////////////
2048// //
2049// VTS table //
2050// //
2051/////////////////////////////////////////////////////////
2052
2053static void VtsID__invalidate_caches ( void ); /* fwds */
2054
2055/* A type to hold VTS table entries. Invariants:
2056 If .vts == NULL, then this entry is not in use, so:
2057 - .rc == 0
2058 - this entry is on the freelist (unfortunately, does not imply
2059 any constraints on value for .nextfree)
2060 If .vts != NULL, then this entry is in use:
2061 - .vts is findable in vts_set
2062 - .vts->id == this entry number
2063 - no specific value for .rc (even 0 is OK)
2064 - this entry is not on freelist, so .nextfree == VtsID_INVALID
2065*/
2066typedef
2067 struct {
2068 VTS* vts; /* vts, in vts_set */
2069 UWord rc; /* reference count - enough for entire aspace */
2070 VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2071 }
2072 VtsTE;
2073
2074/* The VTS table. */
2075static XArray* /* of VtsTE */ vts_tab = NULL;
2076
2077/* An index into the VTS table, indicating the start of the list of
2078 free (available for use) entries. If the list is empty, this is
2079 VtsID_INVALID. */
2080static VtsID vts_tab_freelist = VtsID_INVALID;
2081
2082/* Do a GC of vts_tab when the freelist becomes empty AND the size of
2083 vts_tab equals or exceeds this size. After GC, the value here is
2084 set appropriately so as to check for the next GC point. */
2085static Word vts_next_GC_at = 1000;
2086
2087static void vts_tab_init ( void )
2088{
2089 vts_tab
2090 = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2091 HG_(free), sizeof(VtsTE) );
2092 vts_tab_freelist
2093 = VtsID_INVALID;
2094 tl_assert(vts_tab);
2095}
2096
2097/* Add ii to the free list, checking that it looks out-of-use. */
2098static void add_to_free_list ( VtsID ii )
2099{
2100 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2101 tl_assert(ie->vts == NULL);
2102 tl_assert(ie->rc == 0);
2103 tl_assert(ie->freelink == VtsID_INVALID);
2104 ie->freelink = vts_tab_freelist;
2105 vts_tab_freelist = ii;
2106}
2107
2108/* Get an entry from the free list. This will return VtsID_INVALID if
2109 the free list is empty. */
2110static VtsID get_from_free_list ( void )
2111{
2112 VtsID ii;
2113 VtsTE* ie;
2114 if (vts_tab_freelist == VtsID_INVALID)
2115 return VtsID_INVALID;
2116 ii = vts_tab_freelist;
2117 ie = VG_(indexXA)( vts_tab, ii );
2118 tl_assert(ie->vts == NULL);
2119 tl_assert(ie->rc == 0);
2120 vts_tab_freelist = ie->freelink;
2121 return ii;
2122}
2123
2124/* Produce a new VtsID that can be used, either by getting it from
2125 the freelist, or, if that is empty, by expanding vts_tab. */
2126static VtsID get_new_VtsID ( void )
2127{
2128 VtsID ii;
2129 VtsTE te;
2130 ii = get_from_free_list();
2131 if (ii != VtsID_INVALID)
2132 return ii;
2133 te.vts = NULL;
2134 te.rc = 0;
2135 te.freelink = VtsID_INVALID;
2136 ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2137 return ii;
2138}
2139
2140
2141/* Indirect callback from lib_zsm. */
2142static void VtsID__rcinc ( VtsID ii )
2143{
2144 VtsTE* ie;
2145 /* VG_(indexXA) does a range check for us */
2146 ie = VG_(indexXA)( vts_tab, ii );
2147 tl_assert(ie->vts); /* else it's not in use */
2148 tl_assert(ie->rc < ~0UL); /* else we can't continue */
2149 tl_assert(ie->vts->id == ii);
2150 ie->rc++;
2151}
2152
2153/* Indirect callback from lib_zsm. */
2154static void VtsID__rcdec ( VtsID ii )
2155{
2156 VtsTE* ie;
2157 /* VG_(indexXA) does a range check for us */
2158 ie = VG_(indexXA)( vts_tab, ii );
2159 tl_assert(ie->vts); /* else it's not in use */
2160 tl_assert(ie->rc > 0); /* else RC snafu */
2161 tl_assert(ie->vts->id == ii);
2162 ie->rc--;
2163}
2164
2165
2166/* Look up 'cand' in our collection of VTSs. If present, deallocate
2167 it and return the VtsID for the pre-existing version. If not
2168 present, add it to both vts_tab and vts_set, allocate a fresh VtsID
2169 for it, and return that. */
2170static VtsID vts_tab__find_and_dealloc__or_add ( VTS* cand )
2171{
2172 VTS* auld;
2173 tl_assert(cand->id == VtsID_INVALID);
2174 auld = vts_set__find_and_dealloc__or_add(cand);
2175 if (auld != cand) {
2176 /* We already have an Aulde one. Use that. */
2177 VtsTE* ie;
2178 tl_assert(auld->id != VtsID_INVALID);
2179 ie = VG_(indexXA)( vts_tab, auld->id );
2180 tl_assert(ie->vts == auld);
2181 return auld->id;
2182 } else {
2183 VtsID ii = get_new_VtsID();
2184 VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2185 ie->vts = cand;
2186 ie->rc = 0;
2187 ie->freelink = VtsID_INVALID;
2188 cand->id = ii;
2189 return ii;
2190 }
2191}
2192
2193
2194static void show_vts_stats ( HChar* caller )
2195{
2196 UWord nSet, nTab, nLive;
2197 ULong totrc;
2198 UWord n, i;
2199 nSet = VG_(sizeFM)( vts_set );
2200 nTab = VG_(sizeXA)( vts_tab );
2201 totrc = 0;
2202 nLive = 0;
2203 n = VG_(sizeXA)( vts_tab );
2204 for (i = 0; i < n; i++) {
2205 VtsTE* ie = VG_(indexXA)( vts_tab, i );
2206 if (ie->vts) {
2207 nLive++;
2208 totrc += (ULong)ie->rc;
2209 } else {
2210 tl_assert(ie->rc == 0);
2211 }
2212 }
2213 VG_(printf)(" show_vts_stats %s\n", caller);
2214 VG_(printf)(" vts_tab size %4lu\n", nTab);
2215 VG_(printf)(" vts_tab live %4lu\n", nLive);
2216 VG_(printf)(" vts_set size %4lu\n", nSet);
2217 VG_(printf)(" total rc %4llu\n", totrc);
2218}
2219
2220/* NOT TO BE CALLED FROM WITHIN libzsm. */
2221static void vts_tab__do_GC ( Bool show_stats )
2222{
2223 UWord i, nTab, nLive, nFreed;
2224
2225 /* check this is actually necessary. */
2226 tl_assert(vts_tab_freelist == VtsID_INVALID);
2227
2228 /* empty the caches for partial order checks and binary joins. We
2229 could do better and prune out the entries to be deleted, but it
2230 ain't worth the hassle. */
2231 VtsID__invalidate_caches();
2232
2233 /* First, make the reference counts up to date. */
2234 zsm_flush_cache();
2235
2236 nTab = VG_(sizeXA)( vts_tab );
2237
2238 if (show_stats) {
2239 VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2240 show_vts_stats("before GC");
2241 }
2242
2243 /* Now we can inspect the entire vts_tab. Any entries
2244 with zero .rc fields are now no longer in use and can be
2245 free list, removed from vts_set, and deleted. */
2246 nFreed = 0;
2247 for (i = 0; i < nTab; i++) {
2248 Bool present;
2249 UWord oldK = 0, oldV = 0;
2250 VtsTE* te = VG_(indexXA)( vts_tab, i );
2251 if (te->vts == NULL) {
2252 tl_assert(te->rc == 0);
2253 continue; /* already on the free list (presumably) */
2254 }
2255 if (te->rc > 0)
2256 continue; /* in use */
2257 /* Ok, we got one we can free. */
2258 tl_assert(te->vts->id == i);
2259 /* first, remove it from vts_set. */
2260 present = VG_(delFromFM)( vts_set,
2261 &oldK, &oldV, (UWord)te->vts );
2262 tl_assert(present); /* else it isn't in vts_set ?! */
2263 tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2264 tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2265 /* now free the VTS itself */
2266 VTS__delete(te->vts);
2267 te->vts = NULL;
2268 /* and finally put this entry on the free list */
2269 tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2270 add_to_free_list( i );
2271 nFreed++;
2272 }
2273
2274 /* Now figure out when the next GC should be. We'll allow the
2275 number of VTSs to double before GCing again. Except of course
2276 that since we can't (or, at least, don't) shrink vts_tab, we
2277 can't set the threshhold value smaller than it. */
2278 tl_assert(nFreed <= nTab);
2279 nLive = nTab - nFreed;
2280 tl_assert(nLive >= 0 && nLive <= nTab);
2281 vts_next_GC_at = 2 * nLive;
2282 if (vts_next_GC_at < nTab)
2283 vts_next_GC_at = nTab;
2284
2285 if (show_stats) {
2286 show_vts_stats("after GC");
2287 VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2288 }
2289
sewardjd024ae52008-11-09 20:47:57 +00002290 if (VG_(clo_verbosity) > 1) {
sewardjf98e1c02008-10-25 16:22:41 +00002291 static UInt ctr = 0;
2292 tl_assert(nTab > 0);
sewardjd024ae52008-11-09 20:47:57 +00002293 VG_(message)(Vg_DebugMsg,
2294 "libhb: VTS GC: #%u old size %lu live %lu (%2llu%%)",
sewardjf98e1c02008-10-25 16:22:41 +00002295 ctr++, nTab, nLive, (100ULL * nLive) / nTab);
2296 }
2297}
2298
2299
2300/////////////////////////////////////////////////////////
2301// //
2302// Vts IDs //
2303// //
2304/////////////////////////////////////////////////////////
2305
2306//////////////////////////
2307static ULong stats__getOrdering_queries = 0;
2308static ULong stats__getOrdering_misses = 0;
2309static ULong stats__join2_queries = 0;
2310static ULong stats__join2_misses = 0;
2311
2312static inline UInt ROL32 ( UInt w, Int n ) {
2313 w = (w << n) | (w >> (32-n));
2314 return w;
2315}
2316static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
2317 UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
2318 return hash % nTab;
2319}
2320
2321#define N_GETORDERING_CACHE 1023
2322static
2323 struct { VtsID vi1; VtsID vi2; POrd ord; }
2324 getOrdering_cache[N_GETORDERING_CACHE];
2325
2326#define N_JOIN2_CACHE 1023
2327static
2328 struct { VtsID vi1; VtsID vi2; VtsID res; }
2329 join2_cache[N_JOIN2_CACHE];
2330
2331static void VtsID__invalidate_caches ( void ) {
2332 Int i;
2333 for (i = 0; i < N_GETORDERING_CACHE; i++) {
2334 getOrdering_cache[i].vi1 = VtsID_INVALID;
2335 getOrdering_cache[i].vi2 = VtsID_INVALID;
2336 getOrdering_cache[i].ord = 0; /* an invalid POrd value */
2337 }
2338 for (i = 0; i < N_JOIN2_CACHE; i++) {
2339 join2_cache[i].vi1 = VtsID_INVALID;
2340 join2_cache[i].vi2 = VtsID_INVALID;
2341 join2_cache[i].res = VtsID_INVALID;
2342 }
2343}
2344//////////////////////////
2345
sewardjd52392d2008-11-08 20:36:26 +00002346//static Bool VtsID__is_valid ( VtsID vi ) {
2347// VtsTE* ve;
2348// if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
2349// return False;
2350// ve = VG_(indexXA)( vts_tab, vi );
2351// if (!ve->vts)
2352// return False;
2353// tl_assert(ve->vts->id == vi);
2354// return True;
2355//}
sewardjf98e1c02008-10-25 16:22:41 +00002356
2357static VTS* VtsID__to_VTS ( VtsID vi ) {
2358 VtsTE* te = VG_(indexXA)( vts_tab, vi );
2359 tl_assert(te->vts);
2360 return te->vts;
2361}
2362
2363static void VtsID__pp ( VtsID vi ) {
2364 HChar buf[100];
2365 VTS* vts = VtsID__to_VTS(vi);
2366 VTS__show( buf, sizeof(buf)-1, vts );
2367 buf[sizeof(buf)-1] = 0;
2368 VG_(printf)("%s", buf);
2369}
2370
2371/* compute partial ordering relation of vi1 and vi2. */
2372__attribute__((noinline))
2373static POrd VtsID__getOrdering_WRK ( VtsID vi1, VtsID vi2 ) {
2374 UInt hash;
2375 POrd ord;
2376 VTS *v1, *v2;
2377 //if (vi1 == vi2) return POrd_EQ;
2378 tl_assert(vi1 != vi2);
2379 ////++
2380 stats__getOrdering_queries++;
2381 hash = hash_VtsIDs(vi1, vi2, N_GETORDERING_CACHE);
2382 if (getOrdering_cache[hash].vi1 == vi1
2383 && getOrdering_cache[hash].vi2 == vi2)
2384 return getOrdering_cache[hash].ord;
2385 stats__getOrdering_misses++;
2386 ////--
2387 v1 = VtsID__to_VTS(vi1);
2388 v2 = VtsID__to_VTS(vi2);
2389 ord = VTS__cmp( v1, v2 );
2390 ////++
2391 getOrdering_cache[hash].vi1 = vi1;
2392 getOrdering_cache[hash].vi2 = vi2;
2393 getOrdering_cache[hash].ord = ord;
2394 ////--
2395 return ord;
2396}
2397static inline POrd VtsID__getOrdering ( VtsID vi1, VtsID vi2 ) {
2398 return vi1 == vi2 ? POrd_EQ : VtsID__getOrdering_WRK(vi1, vi2);
2399}
2400
2401/* compute binary join */
2402__attribute__((noinline))
2403static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
2404 UInt hash;
2405 VtsID res;
2406 VTS *vts1, *vts2, *nyu;
2407 //if (vi1 == vi2) return vi1;
2408 tl_assert(vi1 != vi2);
2409 ////++
2410 stats__join2_queries++;
2411 hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
2412 if (join2_cache[hash].vi1 == vi1
2413 && join2_cache[hash].vi2 == vi2)
2414 return join2_cache[hash].res;
2415 stats__join2_misses++;
2416 ////--
2417 vts1 = VtsID__to_VTS(vi1);
2418 vts2 = VtsID__to_VTS(vi2);
2419 nyu = VTS__join(vts1,vts2);
2420 res = vts_tab__find_and_dealloc__or_add(nyu);
2421 ////++
2422 join2_cache[hash].vi1 = vi1;
2423 join2_cache[hash].vi2 = vi2;
2424 join2_cache[hash].res = res;
2425 ////--
2426 return res;
2427}
2428static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
2429 return vi1 == vi2 ? vi1 : VtsID__join2_WRK(vi1, vi2);
2430}
2431
2432/* create a singleton VTS, namely [thr:1] */
2433static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
2434 VTS* nyu = VTS__singleton(thr,tym);
2435 return vts_tab__find_and_dealloc__or_add(nyu);
2436}
2437
2438/* tick operation, creates value 1 if specified index is absent */
2439static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
2440 VTS* vts = VtsID__to_VTS(vi);
2441 VTS* nyu = VTS__tick(idx,vts);
2442 return vts_tab__find_and_dealloc__or_add(nyu);
2443}
2444
2445/* index into a VTS (only for assertions) */
2446static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
2447 VTS* vts = VtsID__to_VTS(vi);
2448 return VTS__indexAt_SLOW( vts, idx );
2449}
2450
2451
2452/////////////////////////////////////////////////////////
2453// //
2454// Threads //
2455// //
2456/////////////////////////////////////////////////////////
2457
2458struct _Thr {
2459 /* Current VTSs for this thread. They change as we go along. viR
2460 is the VTS to be used for reads, viW for writes. Usually they
2461 are the same, but can differ when we deal with reader-writer
2462 locks. It is always the case that VtsID__getOrdering(viW,viR)
2463 == POrd_LT or POrdEQ -- that is, viW must be the same, or
2464 lagging behind, viR. */
2465 VtsID viR;
2466 VtsID viW;
2467 /* opaque (to us) data we hold on behalf of the library's user. */
2468 void* opaque;
2469};
2470
2471static Thr* Thr__new ( void ) {
2472 Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
2473 thr->viR = VtsID_INVALID;
2474 thr->viW = VtsID_INVALID;
2475 return thr;
2476}
2477
2478
2479/////////////////////////////////////////////////////////
2480// //
2481// Shadow Values //
2482// //
2483/////////////////////////////////////////////////////////
2484
2485// type SVal, SVal_INVALID and SVal_NOACCESS are defined by
2486// hb_zsm.h. We have to do everything else here.
2487
2488/* SVal is 64 bit unsigned int.
2489
2490 <---------30---------> <---------30--------->
2491 00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X C(Rmin,Wmin)
2492 01 X--------------------X XX X--------------------X E(rror)
2493 10 X--------------------X XX X--------------------X A: SVal_NOACCESS
2494 11 X--------------------X XX X--------------------X I: SVal_INVALID
2495*/
2496#define SVAL_TAGMASK (3ULL << 62)
2497
2498static inline Bool SVal__isC ( SVal s ) {
2499 return (0ULL << 62) == (s & SVAL_TAGMASK);
2500}
2501static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
2502 //tl_assert(VtsID__is_valid(rmini));
2503 //tl_assert(VtsID__is_valid(wmini));
2504 return (((ULong)rmini) << 32) | ((ULong)wmini);
2505}
2506static inline VtsID SVal__unC_Rmin ( SVal s ) {
2507 tl_assert(SVal__isC(s));
2508 return (VtsID)(s >> 32);
2509}
2510static inline VtsID SVal__unC_Wmin ( SVal s ) {
2511 tl_assert(SVal__isC(s));
2512 return (VtsID)(s & 0xFFFFFFFFULL);
2513}
2514
2515static Bool SVal__isE ( SVal s ) {
2516 return (1ULL << 62) == (s & SVAL_TAGMASK);
2517}
2518static SVal SVal__mkE ( void ) {
2519 return 1ULL << 62;
2520}
2521
2522static Bool SVal__isA ( SVal s ) {
2523 return (2ULL << 62) == (s & SVAL_TAGMASK);
2524}
2525static SVal SVal__mkA ( void ) {
2526 return 2ULL << 62;
2527}
2528
2529/* Direct callback from lib_zsm. */
2530static void SVal__rcinc ( SVal s ) {
2531 if (SVal__isC(s)) {
2532 VtsID__rcinc( SVal__unC_Rmin(s) );
2533 VtsID__rcinc( SVal__unC_Wmin(s) );
2534 }
2535}
2536
2537/* Direct callback from lib_zsm. */
2538static void SVal__rcdec ( SVal s ) {
2539 if (SVal__isC(s)) {
2540 VtsID__rcdec( SVal__unC_Rmin(s) );
2541 VtsID__rcdec( SVal__unC_Wmin(s) );
2542 }
2543}
2544
2545
2546/////////////////////////////////////////////////////////
2547// //
2548// Change-event map2 //
2549// //
2550/////////////////////////////////////////////////////////
2551
2552#define EVENT_MAP_GC_AT (1 * 1000 * 1000)
2553#define EVENT_MAP_GC_DISCARD_FRACTION 0.5
2554
2555/* This is in two parts:
2556
2557 1. An OSet of RCECs. This is a set of reference-counted stack
2558 traces. When the reference count of a stack trace becomes zero,
2559 it is removed from the set and freed up. The intent is to have
2560 a set of stack traces which can be referred to from (2), but to
2561 only represent each one once. The set is indexed/searched by
2562 ordering on the stack trace vectors.
2563
2564 2. An OSet of OldRefs. These store information about each old ref
2565 that we need to record. It is indexed by address of the
2566 location for which the information is recorded. For LRU
2567 purposes, each OldRef also contains a generation number,
2568 indicating when it was most recently accessed.
2569
2570 The important part of an OldRef is, however, its accs[] array.
2571 This is an array of N_OLDREF_ACCS pairs of Thr and a RCEC. This
2572 allows us to collect the last access-traceback by up to
2573 N_OLDREF_ACCS different threads for this location. The accs[]
2574 array is a MTF-array. If a pair falls off the end, that's too
2575 bad -- we will lose info about that thread's access to this
2576 location.
2577
2578 When this OSet becomes too big, we can throw away the entries
2579 whose generation numbers are below some threshold; hence doing
2580 approximate LRU discarding. For each discarded OldRef we must
2581 of course decrement the reference count on the all RCECs it
2582 refers to, in order that entries from (1) eventually get
2583 discarded too.
2584*/
2585
2586
2587static UWord stats__ctxt_rcdec1 = 0;
2588static UWord stats__ctxt_rcdec2 = 0;
2589static UWord stats__ctxt_rcdec3 = 0;
2590static UWord stats__ctxt_rcdec_calls = 0;
2591static UWord stats__ctxt_rcdec_discards = 0;
2592static UWord stats__ctxt_rcdec1_eq = 0;
2593
2594static UWord stats__ctxt_tab_curr = 0;
2595static UWord stats__ctxt_tab_max = 0;
2596
2597static UWord stats__ctxt_tab_qs = 0;
2598static UWord stats__ctxt_tab_cmps = 0;
2599
2600
2601///////////////////////////////////////////////////////
2602//// Part (1): An OSet of RCECs
2603///
2604
2605#define N_FRAMES 8
2606
2607// (UInt) `echo "Reference Counted Execution Context" | md5sum`
2608#define RCEC_MAGIC 0xab88abb2UL
2609
2610//#define N_RCEC_TAB 98317 /* prime */
2611#define N_RCEC_TAB 196613 /* prime */
2612
2613typedef
2614 struct _RCEC {
2615 struct _RCEC* next;
2616 UWord magic;
2617 UWord rc;
2618 UWord rcX; /* used for crosschecking */
2619 UWord frames[1 + N_FRAMES]; /* first word is hash of all the rest */
2620 }
2621 RCEC;
2622
2623static RCEC** contextTab = NULL; /* hash table of RCEC*s */
2624
2625
2626/* Gives an arbitrary total order on RCEC .frames fields */
2627static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
2628 Word i;
2629 tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
2630 tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
2631 if (ec1->frames[0] < ec2->frames[0]) return -1;
2632 if (ec1->frames[0] > ec2->frames[0]) return 1;
2633 for (i = 1; i < 1 + N_FRAMES; i++) {
2634 if (ec1->frames[i] < ec2->frames[i]) return -1;
2635 if (ec1->frames[i] > ec2->frames[i]) return 1;
2636 }
2637 return 0;
2638}
2639
2640
2641/* Dec the ref of this RCEC. */
2642static void ctxt__rcdec ( RCEC* ec )
2643{
2644 stats__ctxt_rcdec_calls++;
2645 tl_assert(ec && ec->magic == RCEC_MAGIC);
2646 tl_assert(ec->rc > 0);
2647 ec->rc--;
2648}
2649
2650static void ctxt__rcinc ( RCEC* ec )
2651{
2652 tl_assert(ec && ec->magic == RCEC_MAGIC);
2653 ec->rc++;
2654}
2655
2656
2657/* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
2658 move it one step closer the the front of the list, so as to make
2659 subsequent searches for it cheaper. */
2660static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
2661{
2662 RCEC *ec0, *ec1, *ec2;
2663 if (ec == *headp)
2664 tl_assert(0); /* already at head of list */
2665 tl_assert(ec != NULL);
2666 ec0 = *headp;
2667 ec1 = NULL;
2668 ec2 = NULL;
2669 while (True) {
2670 if (ec0 == NULL || ec0 == ec) break;
2671 ec2 = ec1;
2672 ec1 = ec0;
2673 ec0 = ec0->next;
2674 }
2675 tl_assert(ec0 == ec);
2676 if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
2677 RCEC* tmp;
2678 /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
2679 predecessor. Swap ec0 and ec1, that is, move ec0 one step
2680 closer to the start of the list. */
2681 tl_assert(ec2->next == ec1);
2682 tl_assert(ec1->next == ec0);
2683 tmp = ec0->next;
2684 ec2->next = ec0;
2685 ec0->next = ec1;
2686 ec1->next = tmp;
2687 }
2688 else
2689 if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
2690 /* it's second in the list. */
2691 tl_assert(*headp == ec1);
2692 tl_assert(ec1->next == ec0);
2693 ec1->next = ec0->next;
2694 ec0->next = ec1;
2695 *headp = ec0;
2696 }
2697}
2698
2699
2700/* Find the given RCEC in the tree, and return a pointer to it. Or,
2701 if not present, add the given one to the tree (by making a copy of
2702 it, so the caller can immediately deallocate the original) and
2703 return a pointer to the copy. The caller can safely have 'example'
2704 on its stack, since we will always return a pointer to a copy of
2705 it, not to the original. Note that the inserted node will have .rc
2706 of zero and so the caller must immediatly increment it. */
2707__attribute__((noinline))
2708static RCEC* ctxt__find_or_add ( RCEC* example )
2709{
2710 UWord hent;
2711 RCEC* copy;
2712 tl_assert(example && example->magic == RCEC_MAGIC);
2713 tl_assert(example->rc == 0);
2714
2715 /* Search the hash table to see if we already have it. */
2716 stats__ctxt_tab_qs++;
2717 hent = example->frames[0] % N_RCEC_TAB;
2718 copy = contextTab[hent];
2719 while (1) {
2720 if (!copy) break;
2721 tl_assert(copy->magic == RCEC_MAGIC);
2722 stats__ctxt_tab_cmps++;
2723 if (0 == RCEC__cmp_by_frames(copy, example)) break;
2724 copy = copy->next;
2725 }
2726
2727 if (copy) {
2728 tl_assert(copy != example);
2729 /* optimisation: if it's not at the head of its list, move 1
2730 step fwds, to make future searches cheaper */
2731 if (copy != contextTab[hent]) {
2732 move_RCEC_one_step_forward( &contextTab[hent], copy );
2733 }
2734 } else {
2735 copy = HG_(zalloc)( "libhb.cfoa.1", sizeof(RCEC) );
2736 tl_assert(copy != example);
2737 *copy = *example;
2738 copy->next = contextTab[hent];
2739 contextTab[hent] = copy;
2740 stats__ctxt_tab_curr++;
2741 if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
2742 stats__ctxt_tab_max = stats__ctxt_tab_curr;
2743 }
2744 return copy;
2745}
2746
2747static inline UWord ROLW ( UWord w, Int n )
2748{
2749 Int bpw = 8 * sizeof(UWord);
2750 w = (w << n) | (w >> (bpw-n));
2751 return w;
2752}
2753
2754__attribute__((noinline))
2755static RCEC* get_RCEC ( Thr* thr )
2756{
2757 UWord hash, i;
2758 RCEC example;
2759 example.magic = RCEC_MAGIC;
2760 example.rc = 0;
2761 example.rcX = 0;
2762 main_get_stacktrace( thr, &example.frames[1], N_FRAMES );
2763 hash = 0;
2764 for (i = 1; i < 1 + N_FRAMES; i++) {
2765 hash ^= example.frames[i];
2766 hash = ROLW(hash, 19);
2767 }
2768 example.frames[0] = hash;
2769 return ctxt__find_or_add( &example );
2770}
2771
2772///////////////////////////////////////////////////////
2773//// Part (2): An OSet of OldRefs, that refer to (1)
2774///
2775
2776// (UInt) `echo "Old Reference Information" | md5sum`
2777#define OldRef_MAGIC 0x30b1f075UL
2778
2779typedef struct { Thr* thr; RCEC* rcec; } Thr_n_RCEC;
2780
2781#define N_OLDREF_ACCS 3
2782
2783typedef
2784 struct {
2785 Addr ea;
2786 UWord magic;
2787 UWord gen; /* when most recently accessed */
2788 /* unused slots in this array have .thr == NULL */
2789 Thr_n_RCEC accs[N_OLDREF_ACCS];
2790 }
2791 OldRef;
2792
sewardjf98e1c02008-10-25 16:22:41 +00002793static OSet* oldrefTree = NULL; /* OSet* of OldRef */
2794static UWord oldrefGen = 0; /* current LRU generation # */
2795static UWord oldrefTreeN = 0; /* # elems in oldrefTree */
2796static UWord oldrefGenIncAt = 0; /* inc gen # when size hits this */
2797
2798static void event_map_bind ( Addr a, Thr* thr )
2799{
2800 OldRef key, *ref;
2801 RCEC* here;
2802 Word i, j;
2803
2804 key.ea = a;
2805 key.magic = OldRef_MAGIC;
2806
2807 ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
2808
2809 if (ref) {
2810
2811 /* We already have a record for this address. We now need to
2812 see if we have a stack trace pertaining to this thread's
2813 access. */
2814 tl_assert(ref->magic == OldRef_MAGIC);
2815
2816 tl_assert(thr);
2817 for (i = 0; i < N_OLDREF_ACCS; i++) {
2818 if (ref->accs[i].thr == thr)
2819 break;
2820 }
2821
2822 if (i < N_OLDREF_ACCS) {
2823 /* thread 'thr' has an entry at index 'i'. Update it. */
2824 if (i > 0) {
2825 Thr_n_RCEC tmp = ref->accs[i-1];
2826 ref->accs[i-1] = ref->accs[i];
2827 ref->accs[i] = tmp;
2828 i--;
2829 }
2830 here = get_RCEC( thr );
2831 if (here == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
2832 ctxt__rcinc( here );
2833 stats__ctxt_rcdec1++;
2834 ctxt__rcdec( ref->accs[i].rcec );
2835 ref->accs[i].rcec = here;
2836 tl_assert(ref->accs[i].thr == thr);
2837 } else {
2838 here = get_RCEC( thr );
2839 ctxt__rcinc( here );
2840 /* No entry for this thread. Shuffle all of them down one
2841 slot, and put the new entry at the start of the array. */
2842 if (ref->accs[N_OLDREF_ACCS-1].thr) {
2843 /* the last slot is in use. We must dec the rc on the
2844 associated rcec. */
2845 tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
2846 stats__ctxt_rcdec2++;
2847 ctxt__rcdec(ref->accs[N_OLDREF_ACCS-1].rcec);
2848 } else {
2849 tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
2850 }
2851 for (j = N_OLDREF_ACCS-1; j >= 1; j--)
2852 ref->accs[j] = ref->accs[j-1];
2853 ref->accs[0].thr = thr;
2854 ref->accs[0].rcec = here;
2855 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
2856 so we can't add a NULL thr. */
2857 }
2858
2859 ref->gen = oldrefGen;
2860 tl_assert(ref->ea == a);
2861
2862 } else {
2863
2864 /* We don't have a record for this address. Create a new one. */
2865 if (oldrefTreeN >= oldrefGenIncAt) {
2866 oldrefGen++;
2867 oldrefGenIncAt = oldrefTreeN + 50000;
2868 if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
2869 oldrefGen, oldrefTreeN );
2870 }
2871 here = get_RCEC( thr );
2872 ctxt__rcinc(here);
2873 ref = VG_(OSetGen_AllocNode)( oldrefTree, sizeof(OldRef) );
2874 ref->magic = OldRef_MAGIC;
2875 ref->gen = oldrefGen;
2876 ref->ea = a;
2877 ref->accs[0].rcec = here;
2878 ref->accs[0].thr = thr;
2879 tl_assert(thr); /* thr==NULL is used to signify an empty slot,
2880 so we can't add a NULL thr. */
2881 for (j = 1; j < N_OLDREF_ACCS; j++) {
2882 ref->accs[j].thr = NULL;
2883 ref->accs[j].rcec = NULL;
2884 }
2885 VG_(OSetGen_Insert)( oldrefTree, ref );
2886 oldrefTreeN++;
2887
2888 }
2889}
2890
2891
2892static
sewardjd52392d2008-11-08 20:36:26 +00002893Bool event_map_lookup ( /*OUT*/ExeContext** resEC,
sewardjf98e1c02008-10-25 16:22:41 +00002894 /*OUT*/Thr** resThr,
2895 Thr* thr_acc, Addr a )
2896{
2897 Word i;
2898 OldRef key, *ref;
2899
2900 tl_assert(thr_acc);
2901
2902 key.ea = a;
2903 key.magic = OldRef_MAGIC;
2904
2905 ref = VG_(OSetGen_Lookup)( oldrefTree, &key );
2906 if (ref) {
2907 tl_assert(ref->magic == OldRef_MAGIC);
2908 tl_assert(ref->accs[0].thr); /* first slot must always be used */
2909
2910 for (i = 0; i < N_OLDREF_ACCS; i++) {
2911 if (ref->accs[i].thr != NULL
2912 && ref->accs[i].thr != thr_acc)
2913 break;
2914 }
2915 /* If we didn't find an entry for some thread other than
2916 thr_acc, just return the entry for thread 0. It'll look
2917 pretty stupid to the user though. */
2918 if (i == N_OLDREF_ACCS)
2919 i = 0;
2920
2921 tl_assert(i >= 0 && i < N_OLDREF_ACCS);
2922 tl_assert(ref->accs[i].thr);
2923 tl_assert(ref->accs[i].rcec);
2924 tl_assert(ref->accs[i].rcec->magic == RCEC_MAGIC);
2925
sewardjd52392d2008-11-08 20:36:26 +00002926 *resEC = VG_(make_ExeContext_from_StackTrace)(
2927 &ref->accs[i].rcec->frames[1], N_FRAMES
2928 );
sewardjf98e1c02008-10-25 16:22:41 +00002929 *resThr = ref->accs[i].thr;
2930 return True;
2931 } else {
2932 return False;
2933 }
2934}
2935
2936static void event_map_init ( void )
2937{
2938 Word i;
2939 tl_assert(!contextTab);
2940 contextTab = HG_(zalloc)( "libhb.event_map_init.1 (context table)",
2941 N_RCEC_TAB * sizeof(RCEC*) );
2942 tl_assert(contextTab);
2943 for (i = 0; i < N_RCEC_TAB; i++)
2944 contextTab[i] = NULL;
2945
2946 tl_assert(!oldrefTree);
2947 tl_assert(offsetof(OldRef,ea) == 0); /* prereq for unboxed cmps */
2948 oldrefTree = VG_(OSetGen_Create)(
2949 offsetof(OldRef,ea), /* == 0 */
2950 NULL, /* use unboxed cmp on OldRefs */
2951 HG_(zalloc), "libhb.event_map_init.2 (oldref tree)",
2952 HG_(free)
2953 );
2954 tl_assert(oldrefTree);
2955
2956 oldrefGen = 0;
2957 oldrefGenIncAt = 0;
2958 oldrefTreeN = 0;
2959}
2960
2961static void event_map__check_reference_counts ( Bool before )
2962{
2963 RCEC* rcec;
2964 OldRef* oldref;
2965 Word i;
2966 UWord nEnts = 0;
2967
2968 /* Set the 'check' reference counts to zero. Also, optionally
2969 check that the real reference counts are non-zero. We allow
2970 these to fall to zero before a GC, but the GC must get rid of
2971 all those that are zero, hence none should be zero after a
2972 GC. */
2973 for (i = 0; i < N_RCEC_TAB; i++) {
2974 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
2975 nEnts++;
2976 tl_assert(rcec);
2977 tl_assert(rcec->magic == RCEC_MAGIC);
2978 if (!before)
2979 tl_assert(rcec->rc > 0);
2980 rcec->rcX = 0;
2981 }
2982 }
2983
2984 /* check that the stats are sane */
2985 tl_assert(nEnts == stats__ctxt_tab_curr);
2986 tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
2987
2988 /* visit all the referencing points, inc check ref counts */
2989 VG_(OSetGen_ResetIter)( oldrefTree );
2990 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
2991 tl_assert(oldref->magic == OldRef_MAGIC);
2992 for (i = 0; i < N_OLDREF_ACCS; i++) {
2993 if (oldref->accs[i].thr) {
2994 tl_assert(oldref->accs[i].rcec);
2995 tl_assert(oldref->accs[i].rcec->magic == RCEC_MAGIC);
2996 oldref->accs[i].rcec->rcX++;
2997 } else {
2998 tl_assert(!oldref->accs[i].rcec);
2999 }
3000 }
3001 }
3002
3003 /* compare check ref counts with actual */
3004 for (i = 0; i < N_RCEC_TAB; i++) {
3005 for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
3006 tl_assert(rcec->rc == rcec->rcX);
3007 }
3008 }
3009}
3010
3011static void event_map_maybe_GC ( void )
3012{
3013 OldRef* oldref;
3014 UWord keyW, valW, retained, maxGen;
3015 WordFM* genMap;
3016 XArray* refs2del;
3017 Word i, j, n2del;
3018
3019 if (LIKELY(oldrefTreeN < EVENT_MAP_GC_AT))
3020 return;
3021
3022 if (0)
3023 VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
3024
3025 /* Check our counting is sane */
3026 tl_assert(oldrefTreeN == (UWord) VG_(OSetGen_Size)( oldrefTree ));
3027
3028 /* Check the reference counts */
3029 event_map__check_reference_counts( True/*before*/ );
3030
3031 /* Compute the distribution of generation values in the ref tree */
3032 /* genMap :: generation-number -> count-of-nodes-with-that-number */
3033 genMap = VG_(newFM)( HG_(zalloc), "libhb.emmG.1",
3034 HG_(free), NULL );
3035
3036 VG_(OSetGen_ResetIter)( oldrefTree );
3037 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
3038 UWord key = oldref->gen;
3039 keyW = valW = 0;
3040 if (VG_(lookupFM)(genMap, &keyW, &valW, key )) {
3041 tl_assert(keyW == key);
3042 tl_assert(valW > 0);
3043 }
3044 /* now valW is the old count for generation 'key' */
3045 VG_(addToFM)(genMap, key, valW+1);
3046 }
3047
3048 tl_assert(VG_(sizeFM)(genMap) > 0);
3049
3050 retained = oldrefTreeN;
3051 maxGen = 0;
3052 VG_(initIterFM)( genMap );
3053 while (VG_(nextIterFM)( genMap, &keyW, &valW )) {
3054 tl_assert(keyW > 0); /* can't allow a generation # 0 */
3055 if (0) VG_(printf)(" XXX: gen %lu has %lu\n", keyW, valW );
3056 tl_assert(keyW >= maxGen);
3057 tl_assert(retained >= valW);
3058 if (retained - valW
3059 > (UWord)(EVENT_MAP_GC_AT * EVENT_MAP_GC_DISCARD_FRACTION)) {
3060 retained -= valW;
3061 maxGen = keyW;
3062 } else {
3063 break;
3064 }
3065 }
3066 VG_(doneIterFM)( genMap );
3067
sewardjf98e1c02008-10-25 16:22:41 +00003068 VG_(deleteFM)( genMap, NULL, NULL );
3069
sewardj9b1f0fd2008-11-18 23:40:00 +00003070 tl_assert(retained >= 0 && retained <= oldrefTreeN);
sewardjf98e1c02008-10-25 16:22:41 +00003071
3072 /* Now make up a big list of the oldrefTree entries we want to
3073 delete. We can't simultaneously traverse the tree and delete
3074 stuff from it, so first we need to copy them off somewhere
3075 else. (sigh) */
3076 refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.1",
3077 HG_(free), sizeof(OldRef*) );
3078
sewardj9b1f0fd2008-11-18 23:40:00 +00003079 if (retained < oldrefTreeN) {
3080
3081 /* This is the normal (expected) case. We discard any ref whose
3082 generation number <= maxGen. */
3083 VG_(OSetGen_ResetIter)( oldrefTree );
3084 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
3085 tl_assert(oldref->magic == OldRef_MAGIC);
3086 if (oldref->gen <= maxGen) {
3087 VG_(addToXA)( refs2del, &oldref );
3088 }
sewardjf98e1c02008-10-25 16:22:41 +00003089 }
sewardj9b1f0fd2008-11-18 23:40:00 +00003090 if (VG_(clo_verbosity) > 1) {
3091 VG_(message)(Vg_DebugMsg,
3092 "libhb: EvM GC: delete generations %lu and below, "
3093 "retaining %lu entries",
3094 maxGen, retained );
3095 }
3096
3097 } else {
3098
3099 static UInt rand_seed = 0; /* leave as static */
3100
3101 /* Degenerate case: there's only one generation in the entire
3102 tree, so we need to have some other way of deciding which
3103 refs to throw away. Just throw out half of them randomly. */
3104 tl_assert(retained == oldrefTreeN);
3105 VG_(OSetGen_ResetIter)( oldrefTree );
3106 while ( (oldref = VG_(OSetGen_Next)( oldrefTree )) ) {
3107 UInt n;
3108 tl_assert(oldref->magic == OldRef_MAGIC);
3109 n = VG_(random)( &rand_seed );
3110 if ((n & 0xFFF) < 0x800) {
3111 VG_(addToXA)( refs2del, &oldref );
3112 retained--;
3113 }
3114 }
3115 if (VG_(clo_verbosity) > 1) {
3116 VG_(message)(Vg_DebugMsg,
3117 "libhb: EvM GC: randomly delete half the entries, "
3118 "retaining %lu entries",
3119 retained );
3120 }
3121
sewardjf98e1c02008-10-25 16:22:41 +00003122 }
3123
3124 n2del = VG_(sizeXA)( refs2del );
3125 tl_assert(n2del == (Word)(oldrefTreeN - retained));
3126
3127 if (0) VG_(printf)("%s","deleting entries\n");
3128 for (i = 0; i < n2del; i++) {
3129 void* nd;
3130 OldRef* ref = *(OldRef**)VG_(indexXA)( refs2del, i );
3131 tl_assert(ref);
3132 tl_assert(ref->magic == OldRef_MAGIC);
3133 for (j = 0; j < N_OLDREF_ACCS; j++) {
3134 if (ref->accs[j].rcec) {
3135 tl_assert(ref->accs[j].thr);
3136 stats__ctxt_rcdec3++;
3137 ctxt__rcdec( ref->accs[j].rcec );
3138 } else {
3139 tl_assert(!ref->accs[j].thr);
3140 }
3141 }
3142 nd = VG_(OSetGen_Remove)( oldrefTree, ref );
3143 VG_(OSetGen_FreeNode)( oldrefTree, nd );
3144 }
3145
3146 VG_(deleteXA)( refs2del );
3147
3148 tl_assert( VG_(OSetGen_Size)( oldrefTree ) == retained );
3149
3150 oldrefTreeN = retained;
3151 oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
3152
3153 /* Throw away all RCECs with zero reference counts */
3154 for (i = 0; i < N_RCEC_TAB; i++) {
3155 RCEC** pp = &contextTab[i];
3156 RCEC* p = *pp;
3157 while (p) {
3158 if (p->rc == 0) {
3159 *pp = p->next;
3160 HG_(free)(p);
3161 p = *pp;
3162 tl_assert(stats__ctxt_tab_curr > 0);
3163 stats__ctxt_tab_curr--;
3164 } else {
3165 pp = &p->next;
3166 p = p->next;
3167 }
3168 }
3169 }
3170
3171 /* Check the reference counts */
3172 event_map__check_reference_counts( False/*after*/ );
3173
3174 //if (0)
3175 //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
3176 // VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
3177
3178}
3179
3180
3181/////////////////////////////////////////////////////////
3182// //
3183// Core MSM //
3184// //
3185/////////////////////////////////////////////////////////
3186
sewardjb0e009d2008-11-19 16:35:15 +00003187/* Logic in msm_read/msm_write updated/verified after re-analysis,
3188 19 Nov 08. */
3189
sewardjf98e1c02008-10-25 16:22:41 +00003190#define MSM_CONFACC 1
3191
sewardjf98e1c02008-10-25 16:22:41 +00003192#define MSM_CHECK 0
3193
sewardjb0e009d2008-11-19 16:35:15 +00003194/* 19 Nov 08: it seems that MSM_RACE2ERR == 1 is a bad idea. When
3195 nonzero, the effect is that when a race is detected for a location,
3196 that location is put into a special 'error' state and no further
3197 checking of it is done until it returns to a 'normal' state, which
3198 requires it to be deallocated and reallocated.
3199
3200 This is a bad idea, because of the interaction with suppressions.
3201 Suppose there is a race on the location, but the error is
3202 suppressed. The location now is marked as in-error. Now any
3203 subsequent race -- including ones we want to see -- will never be
3204 detected until the location is deallocated and reallocated.
3205
3206 Hence set MSM_CHECK to zero. This causes raced-on locations to
3207 remain in the normal 'C' (constrained) state, but places on them
3208 the constraint that the next accesses happen-after both the
3209 existing constraint and the relevant vector clock of the thread
3210 doing the racing access.
3211*/
3212#define MSM_RACE2ERR 0
3213
sewardjf98e1c02008-10-25 16:22:41 +00003214static ULong stats__msm_read = 0;
3215static ULong stats__msm_read_change = 0;
3216static ULong stats__msm_write = 0;
3217static ULong stats__msm_write_change = 0;
3218
3219__attribute__((noinline))
3220static void record_race_info ( Thr* acc_thr,
3221 Addr acc_addr, SizeT szB, Bool isWrite,
3222 SVal svOld, SVal svNew )
3223{
3224 Bool found;
3225 Thr* thrp = NULL;
sewardjd52392d2008-11-08 20:36:26 +00003226 ExeContext* where = NULL;
3227 ExeContext* wherep = NULL;
sewardjf98e1c02008-10-25 16:22:41 +00003228 where = main_get_EC( acc_thr );
3229 found = event_map_lookup( &wherep, &thrp, acc_thr, acc_addr );
3230 if (found) {
3231 tl_assert(wherep);
3232 tl_assert(thrp);
3233 tl_assert(thrp->opaque);
3234 tl_assert(acc_thr->opaque);
3235 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3236 isWrite, szB, NULL/*mb_lastlock*/,
3237 wherep, thrp->opaque );
3238 } else {
3239 tl_assert(!wherep);
3240 tl_assert(!thrp);
3241 tl_assert(acc_thr->opaque);
3242 HG_(record_error_Race)( acc_thr->opaque, acc_addr,
3243 isWrite, szB, NULL/*mb_lastlock*/,
3244 NULL, NULL );
3245 }
3246}
3247
3248static Bool is_sane_SVal_C ( SVal sv ) {
3249 POrd ord;
3250 if (!SVal__isC(sv)) return True;
3251 ord = VtsID__getOrdering( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
3252 if (ord == POrd_EQ || ord == POrd_LT) return True;
3253 return False;
3254}
3255
3256
3257/* Compute new state following a read */
3258static inline SVal msm_read ( SVal svOld,
3259 /* The following are only needed for
3260 creating error reports. */
3261 Thr* acc_thr,
3262 Addr acc_addr, SizeT szB )
3263{
3264 SVal svNew = SVal_INVALID;
3265 stats__msm_read++;
3266
3267 /* Redundant sanity check on the constraints */
3268 if (MSM_CHECK) {
3269 tl_assert(is_sane_SVal_C(svOld));
3270 }
3271
3272 if (SVal__isC(svOld)) {
3273 POrd ord;
3274 VtsID tviR = acc_thr->viR;
3275 VtsID tviW = acc_thr->viW;
3276 VtsID rmini = SVal__unC_Rmin(svOld);
3277 VtsID wmini = SVal__unC_Wmin(svOld);
3278
3279 ord = VtsID__getOrdering(rmini,tviR);
3280 if (ord == POrd_EQ || ord == POrd_LT) {
3281 /* no race */
3282 /* Note: RWLOCK subtlety: use tviW, not tviR */
3283 svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
3284 goto out;
3285 } else {
sewardjb0e009d2008-11-19 16:35:15 +00003286 /* assert on sanity of constraints. */
3287 POrd ordxx = VtsID__getOrdering(rmini,wmini);
3288 tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
sewardjf98e1c02008-10-25 16:22:41 +00003289 svNew = MSM_RACE2ERR
3290 ? SVal__mkE()
sewardjb0e009d2008-11-19 16:35:15 +00003291 : SVal__mkC( VtsID__join2(wmini,tviR),
3292 VtsID__join2(wmini,tviW) );
sewardjf98e1c02008-10-25 16:22:41 +00003293 record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
3294 svOld, svNew );
3295 goto out;
3296 }
3297 }
3298 if (SVal__isA(svOld)) {
3299 /* reading no-access memory (sigh); leave unchanged */
3300 /* check for no pollution */
3301 tl_assert(svOld == SVal_NOACCESS);
3302 svNew = SVal_NOACCESS;
3303 goto out;
3304 }
3305 if (SVal__isE(svOld)) {
3306 /* no race, location is already "in error" */
3307 svNew = SVal__mkE();
3308 goto out;
3309 }
3310 VG_(printf)("msm_read: bad svOld: 0x%016llx\n", svOld);
3311 tl_assert(0);
3312
3313 out:
3314 if (MSM_CHECK) {
3315 tl_assert(is_sane_SVal_C(svNew));
3316 }
3317 tl_assert(svNew != SVal_INVALID);
3318 if (svNew != svOld) {
3319 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3320 event_map_bind( acc_addr, acc_thr );
3321 stats__msm_read_change++;
3322 }
3323 }
3324 return svNew;
3325}
3326
3327
3328/* Compute new state following a write */
3329static inline SVal msm_write ( SVal svOld,
3330 /* The following are only needed for
3331 creating error reports. */
3332 Thr* acc_thr,
3333 Addr acc_addr, SizeT szB )
3334{
3335 SVal svNew = SVal_INVALID;
3336 stats__msm_write++;
3337
3338 /* Redundant sanity check on the constraints */
3339 if (MSM_CHECK) {
3340 tl_assert(is_sane_SVal_C(svOld));
3341 }
3342
3343 if (SVal__isC(svOld)) {
3344 POrd ord;
3345 VtsID tviW = acc_thr->viW;
3346 VtsID wmini = SVal__unC_Wmin(svOld);
3347
3348 ord = VtsID__getOrdering(wmini,tviW);
3349 if (ord == POrd_EQ || ord == POrd_LT) {
3350 /* no race */
3351 svNew = SVal__mkC( tviW, tviW );
3352 goto out;
3353 } else {
sewardjb0e009d2008-11-19 16:35:15 +00003354 VtsID tviR = acc_thr->viR;
sewardjf98e1c02008-10-25 16:22:41 +00003355 VtsID rmini = SVal__unC_Rmin(svOld);
sewardjb0e009d2008-11-19 16:35:15 +00003356 /* assert on sanity of constraints. */
3357 POrd ordxx = VtsID__getOrdering(rmini,wmini);
3358 tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT);
sewardjf98e1c02008-10-25 16:22:41 +00003359 svNew = MSM_RACE2ERR
3360 ? SVal__mkE()
sewardjb0e009d2008-11-19 16:35:15 +00003361 : SVal__mkC( VtsID__join2(wmini,tviR),
sewardjf98e1c02008-10-25 16:22:41 +00003362 VtsID__join2(wmini,tviW) );
3363 record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
3364 svOld, svNew );
3365 goto out;
3366 }
3367 }
3368 if (SVal__isA(svOld)) {
3369 /* writing no-access memory (sigh); leave unchanged */
3370 /* check for no pollution */
3371 tl_assert(svOld == SVal_NOACCESS);
3372 svNew = SVal_NOACCESS;
3373 goto out;
3374 }
3375 if (SVal__isE(svOld)) {
3376 /* no race, location is already "in error" */
3377 svNew = SVal__mkE();
3378 goto out;
3379 }
3380 VG_(printf)("msm_write: bad svOld: 0x%016llx\n", svOld);
3381 tl_assert(0);
3382
3383 out:
3384 if (MSM_CHECK) {
3385 tl_assert(is_sane_SVal_C(svNew));
3386 }
3387 tl_assert(svNew != SVal_INVALID);
3388 if (svNew != svOld) {
3389 if (MSM_CONFACC && SVal__isC(svOld) && SVal__isC(svNew)) {
3390 event_map_bind( acc_addr, acc_thr );
3391 stats__msm_write_change++;
3392 }
3393 }
3394 return svNew;
3395}
3396
3397
3398/////////////////////////////////////////////////////////
3399// //
3400// Apply core MSM to specific memory locations //
3401// //
3402/////////////////////////////////////////////////////////
3403
3404/*------------- ZSM accesses: 8 bit apply ------------- */
3405
3406void zsm_apply8___msm_read ( Thr* thr, Addr a ) {
3407 CacheLine* cl;
3408 UWord cloff, tno, toff;
3409 SVal svOld, svNew;
3410 UShort descr;
3411 stats__cline_read8s++;
3412 cl = get_cacheline(a);
3413 cloff = get_cacheline_offset(a);
3414 tno = get_treeno(a);
3415 toff = get_tree_offset(a); /* == 0 .. 7 */
3416 descr = cl->descrs[tno];
3417 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3418 SVal* tree = &cl->svals[tno << 3];
3419 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3420 if (SCE_CACHELINE)
3421 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3422 }
3423 svOld = cl->svals[cloff];
3424 svNew = msm_read( svOld, thr,a,1 );
3425 tl_assert(svNew != SVal_INVALID);
3426 cl->svals[cloff] = svNew;
3427}
3428
3429void zsm_apply8___msm_write ( Thr* thr, Addr a ) {
3430 CacheLine* cl;
3431 UWord cloff, tno, toff;
3432 SVal svOld, svNew;
3433 UShort descr;
3434 stats__cline_read8s++;
3435 cl = get_cacheline(a);
3436 cloff = get_cacheline_offset(a);
3437 tno = get_treeno(a);
3438 toff = get_tree_offset(a); /* == 0 .. 7 */
3439 descr = cl->descrs[tno];
3440 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3441 SVal* tree = &cl->svals[tno << 3];
3442 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3443 if (SCE_CACHELINE)
3444 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3445 }
3446 svOld = cl->svals[cloff];
3447 svNew = msm_write( svOld, thr,a,1 );
3448 tl_assert(svNew != SVal_INVALID);
3449 cl->svals[cloff] = svNew;
3450}
3451
3452/*------------- ZSM accesses: 16 bit apply ------------- */
3453
3454void zsm_apply16___msm_read ( Thr* thr, Addr a ) {
3455 CacheLine* cl;
3456 UWord cloff, tno, toff;
3457 SVal svOld, svNew;
3458 UShort descr;
3459 stats__cline_read16s++;
3460 if (UNLIKELY(!aligned16(a))) goto slowcase;
3461 cl = get_cacheline(a);
3462 cloff = get_cacheline_offset(a);
3463 tno = get_treeno(a);
3464 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3465 descr = cl->descrs[tno];
3466 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3467 if (valid_value_is_below_me_16(descr, toff)) {
3468 goto slowcase;
3469 } else {
3470 SVal* tree = &cl->svals[tno << 3];
3471 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3472 }
3473 if (SCE_CACHELINE)
3474 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3475 }
3476 svOld = cl->svals[cloff];
3477 svNew = msm_read( svOld, thr,a,2 );
3478 tl_assert(svNew != SVal_INVALID);
3479 cl->svals[cloff] = svNew;
3480 return;
3481 slowcase: /* misaligned, or must go further down the tree */
3482 stats__cline_16to8splits++;
3483 zsm_apply8___msm_read( thr, a + 0 );
3484 zsm_apply8___msm_read( thr, a + 1 );
3485}
3486
3487void zsm_apply16___msm_write ( Thr* thr, Addr a ) {
3488 CacheLine* cl;
3489 UWord cloff, tno, toff;
3490 SVal svOld, svNew;
3491 UShort descr;
3492 stats__cline_read16s++;
3493 if (UNLIKELY(!aligned16(a))) goto slowcase;
3494 cl = get_cacheline(a);
3495 cloff = get_cacheline_offset(a);
3496 tno = get_treeno(a);
3497 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3498 descr = cl->descrs[tno];
3499 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3500 if (valid_value_is_below_me_16(descr, toff)) {
3501 goto slowcase;
3502 } else {
3503 SVal* tree = &cl->svals[tno << 3];
3504 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3505 }
3506 if (SCE_CACHELINE)
3507 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3508 }
3509 svOld = cl->svals[cloff];
3510 svNew = msm_write( svOld, thr,a,2 );
3511 tl_assert(svNew != SVal_INVALID);
3512 cl->svals[cloff] = svNew;
3513 return;
3514 slowcase: /* misaligned, or must go further down the tree */
3515 stats__cline_16to8splits++;
3516 zsm_apply8___msm_write( thr, a + 0 );
3517 zsm_apply8___msm_write( thr, a + 1 );
3518}
3519
3520/*------------- ZSM accesses: 32 bit apply ------------- */
3521
3522void zsm_apply32___msm_read ( Thr* thr, Addr a ) {
3523 CacheLine* cl;
3524 UWord cloff, tno, toff;
3525 SVal svOld, svNew;
3526 UShort descr;
3527 if (UNLIKELY(!aligned32(a))) goto slowcase;
3528 cl = get_cacheline(a);
3529 cloff = get_cacheline_offset(a);
3530 tno = get_treeno(a);
3531 toff = get_tree_offset(a); /* == 0 or 4 */
3532 descr = cl->descrs[tno];
3533 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3534 if (valid_value_is_above_me_32(descr, toff)) {
3535 SVal* tree = &cl->svals[tno << 3];
3536 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3537 } else {
3538 goto slowcase;
3539 }
3540 if (SCE_CACHELINE)
3541 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3542 }
3543 svOld = cl->svals[cloff];
3544 svNew = msm_read( svOld, thr,a,4 );
3545 tl_assert(svNew != SVal_INVALID);
3546 cl->svals[cloff] = svNew;
3547 return;
3548 slowcase: /* misaligned, or must go further down the tree */
3549 stats__cline_32to16splits++;
3550 zsm_apply16___msm_read( thr, a + 0 );
3551 zsm_apply16___msm_read( thr, a + 2 );
3552}
3553
3554void zsm_apply32___msm_write ( Thr* thr, Addr a ) {
3555 CacheLine* cl;
3556 UWord cloff, tno, toff;
3557 SVal svOld, svNew;
3558 UShort descr;
3559 if (UNLIKELY(!aligned32(a))) goto slowcase;
3560 cl = get_cacheline(a);
3561 cloff = get_cacheline_offset(a);
3562 tno = get_treeno(a);
3563 toff = get_tree_offset(a); /* == 0 or 4 */
3564 descr = cl->descrs[tno];
3565 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3566 if (valid_value_is_above_me_32(descr, toff)) {
3567 SVal* tree = &cl->svals[tno << 3];
3568 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3569 } else {
3570 goto slowcase;
3571 }
3572 if (SCE_CACHELINE)
3573 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3574 }
3575 svOld = cl->svals[cloff];
3576 svNew = msm_write( svOld, thr,a,4 );
3577 tl_assert(svNew != SVal_INVALID);
3578 cl->svals[cloff] = svNew;
3579 return;
3580 slowcase: /* misaligned, or must go further down the tree */
3581 stats__cline_32to16splits++;
3582 zsm_apply16___msm_write( thr, a + 0 );
3583 zsm_apply16___msm_write( thr, a + 2 );
3584}
3585
3586/*------------- ZSM accesses: 64 bit apply ------------- */
3587
3588void zsm_apply64___msm_read ( Thr* thr, Addr a ) {
3589 CacheLine* cl;
3590 UWord cloff, tno, toff;
3591 SVal svOld, svNew;
3592 UShort descr;
3593 stats__cline_read64s++;
3594 if (UNLIKELY(!aligned64(a))) goto slowcase;
3595 cl = get_cacheline(a);
3596 cloff = get_cacheline_offset(a);
3597 tno = get_treeno(a);
3598 toff = get_tree_offset(a); /* == 0, unused */
3599 descr = cl->descrs[tno];
3600 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3601 goto slowcase;
3602 }
3603 svOld = cl->svals[cloff];
3604 svNew = msm_read( svOld, thr,a,8 );
3605 tl_assert(svNew != SVal_INVALID);
3606 cl->svals[cloff] = svNew;
3607 return;
3608 slowcase: /* misaligned, or must go further down the tree */
3609 stats__cline_64to32splits++;
3610 zsm_apply32___msm_read( thr, a + 0 );
3611 zsm_apply32___msm_read( thr, a + 4 );
3612}
3613
3614void zsm_apply64___msm_write ( Thr* thr, Addr a ) {
3615 CacheLine* cl;
3616 UWord cloff, tno, toff;
3617 SVal svOld, svNew;
3618 UShort descr;
3619 stats__cline_read64s++;
3620 if (UNLIKELY(!aligned64(a))) goto slowcase;
3621 cl = get_cacheline(a);
3622 cloff = get_cacheline_offset(a);
3623 tno = get_treeno(a);
3624 toff = get_tree_offset(a); /* == 0, unused */
3625 descr = cl->descrs[tno];
3626 if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
3627 goto slowcase;
3628 }
3629 svOld = cl->svals[cloff];
3630 svNew = msm_write( svOld, thr,a,8 );
3631 tl_assert(svNew != SVal_INVALID);
3632 cl->svals[cloff] = svNew;
3633 return;
3634 slowcase: /* misaligned, or must go further down the tree */
3635 stats__cline_64to32splits++;
3636 zsm_apply32___msm_write( thr, a + 0 );
3637 zsm_apply32___msm_write( thr, a + 4 );
3638}
3639
3640/*--------------- ZSM accesses: 8 bit write --------------- */
3641
3642static
3643void zsm_write8 ( Addr a, SVal svNew ) {
3644 CacheLine* cl;
3645 UWord cloff, tno, toff;
3646 UShort descr;
3647 stats__cline_set8s++;
3648 cl = get_cacheline(a);
3649 cloff = get_cacheline_offset(a);
3650 tno = get_treeno(a);
3651 toff = get_tree_offset(a); /* == 0 .. 7 */
3652 descr = cl->descrs[tno];
3653 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3654 SVal* tree = &cl->svals[tno << 3];
3655 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3656 if (SCE_CACHELINE)
3657 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3658 }
3659 tl_assert(svNew != SVal_INVALID);
3660 cl->svals[cloff] = svNew;
3661}
3662
3663/*--------------- ZSM accesses: 16 bit write --------------- */
3664
3665static
3666void zsm_write16 ( Addr a, SVal svNew ) {
3667 CacheLine* cl;
3668 UWord cloff, tno, toff;
3669 UShort descr;
3670 stats__cline_set16s++;
3671 if (UNLIKELY(!aligned16(a))) goto slowcase;
3672 cl = get_cacheline(a);
3673 cloff = get_cacheline_offset(a);
3674 tno = get_treeno(a);
3675 toff = get_tree_offset(a); /* == 0, 2, 4 or 6 */
3676 descr = cl->descrs[tno];
3677 if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
3678 if (valid_value_is_below_me_16(descr, toff)) {
3679 /* Writing at this level. Need to fix up 'descr'. */
3680 cl->descrs[tno] = pullup_descr_to_16(descr, toff);
3681 /* At this point, the tree does not match cl->descr[tno] any
3682 more. The assignments below will fix it up. */
3683 } else {
3684 /* We can't indiscriminately write on the w16 node as in the
3685 w64 case, as that might make the node inconsistent with
3686 its parent. So first, pull down to this level. */
3687 SVal* tree = &cl->svals[tno << 3];
3688 cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
3689 if (SCE_CACHELINE)
3690 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3691 }
3692 }
3693 tl_assert(svNew != SVal_INVALID);
3694 cl->svals[cloff + 0] = svNew;
3695 cl->svals[cloff + 1] = SVal_INVALID;
3696 return;
3697 slowcase: /* misaligned */
3698 stats__cline_16to8splits++;
3699 zsm_write8( a + 0, svNew );
3700 zsm_write8( a + 1, svNew );
3701}
3702
3703/*--------------- ZSM accesses: 32 bit write --------------- */
3704
3705static
3706void zsm_write32 ( Addr a, SVal svNew ) {
3707 CacheLine* cl;
3708 UWord cloff, tno, toff;
3709 UShort descr;
3710 stats__cline_set32s++;
3711 if (UNLIKELY(!aligned32(a))) goto slowcase;
3712 cl = get_cacheline(a);
3713 cloff = get_cacheline_offset(a);
3714 tno = get_treeno(a);
3715 toff = get_tree_offset(a); /* == 0 or 4 */
3716 descr = cl->descrs[tno];
3717 if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
3718 if (valid_value_is_above_me_32(descr, toff)) {
3719 /* We can't indiscriminately write on the w32 node as in the
3720 w64 case, as that might make the node inconsistent with
3721 its parent. So first, pull down to this level. */
3722 SVal* tree = &cl->svals[tno << 3];
3723 cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
3724 if (SCE_CACHELINE)
3725 tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
3726 } else {
3727 /* Writing at this level. Need to fix up 'descr'. */
3728 cl->descrs[tno] = pullup_descr_to_32(descr, toff);
3729 /* At this point, the tree does not match cl->descr[tno] any
3730 more. The assignments below will fix it up. */
3731 }
3732 }
3733 tl_assert(svNew != SVal_INVALID);
3734 cl->svals[cloff + 0] = svNew;
3735 cl->svals[cloff + 1] = SVal_INVALID;
3736 cl->svals[cloff + 2] = SVal_INVALID;
3737 cl->svals[cloff + 3] = SVal_INVALID;
3738 return;
3739 slowcase: /* misaligned */
3740 stats__cline_32to16splits++;
3741 zsm_write16( a + 0, svNew );
3742 zsm_write16( a + 2, svNew );
3743}
3744
3745/*--------------- ZSM accesses: 64 bit write --------------- */
3746
3747static
3748void zsm_write64 ( Addr a, SVal svNew ) {
3749 CacheLine* cl;
3750 UWord cloff, tno, toff;
3751 stats__cline_set64s++;
3752 if (UNLIKELY(!aligned64(a))) goto slowcase;
3753 cl = get_cacheline(a);
3754 cloff = get_cacheline_offset(a);
3755 tno = get_treeno(a);
3756 toff = get_tree_offset(a); /* == 0 */
3757 cl->descrs[tno] = TREE_DESCR_64;
3758 tl_assert(svNew != SVal_INVALID);
3759 cl->svals[cloff + 0] = svNew;
3760 cl->svals[cloff + 1] = SVal_INVALID;
3761 cl->svals[cloff + 2] = SVal_INVALID;
3762 cl->svals[cloff + 3] = SVal_INVALID;
3763 cl->svals[cloff + 4] = SVal_INVALID;
3764 cl->svals[cloff + 5] = SVal_INVALID;
3765 cl->svals[cloff + 6] = SVal_INVALID;
3766 cl->svals[cloff + 7] = SVal_INVALID;
3767 return;
3768 slowcase: /* misaligned */
3769 stats__cline_64to32splits++;
3770 zsm_write32( a + 0, svNew );
3771 zsm_write32( a + 4, svNew );
3772}
3773
3774/*------------- ZSM accesses: 8 bit read/copy ------------- */
3775
3776static
3777SVal zsm_read8 ( Addr a ) {
3778 CacheLine* cl;
3779 UWord cloff, tno, toff;
3780 UShort descr;
3781 stats__cline_get8s++;
3782 cl = get_cacheline(a);
3783 cloff = get_cacheline_offset(a);
3784 tno = get_treeno(a);
3785 toff = get_tree_offset(a); /* == 0 .. 7 */
3786 descr = cl->descrs[tno];
3787 if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
3788 SVal* tree = &cl->svals[tno << 3];
3789 cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
3790 }
3791 return cl->svals[cloff];
3792}
3793
3794static void zsm_copy8 ( Addr src, Addr dst, Bool uu_normalise ) {
3795 SVal sv;
3796 stats__cline_copy8s++;
3797 sv = zsm_read8( src );
3798 zsm_write8( dst, sv );
3799}
3800
3801/* ------------ Shadow memory range setting ops ------------ */
3802
3803void zsm_apply_range___msm_read ( Thr* thr,
3804 Addr a, SizeT len )
3805{
3806 /* fast track a couple of common cases */
3807 if (len == 4 && aligned32(a)) {
3808 zsm_apply32___msm_read( thr, a );
3809 return;
3810 }
3811 if (len == 8 && aligned64(a)) {
3812 zsm_apply64___msm_read( thr, a );
3813 return;
3814 }
3815
3816 /* be completely general (but as efficient as possible) */
3817 if (len == 0) return;
3818
3819 if (!aligned16(a) && len >= 1) {
3820 zsm_apply8___msm_read( thr, a );
3821 a += 1;
3822 len -= 1;
3823 tl_assert(aligned16(a));
3824 }
3825 if (len == 0) return;
3826
3827 if (!aligned32(a) && len >= 2) {
3828 zsm_apply16___msm_read( thr, a );
3829 a += 2;
3830 len -= 2;
3831 tl_assert(aligned32(a));
3832 }
3833 if (len == 0) return;
3834
3835 if (!aligned64(a) && len >= 4) {
3836 zsm_apply32___msm_read( thr, a );
3837 a += 4;
3838 len -= 4;
3839 tl_assert(aligned64(a));
3840 }
3841 if (len == 0) return;
3842
3843 if (len >= 8) {
3844 tl_assert(aligned64(a));
3845 while (len >= 8) {
3846 zsm_apply64___msm_read( thr, a );
3847 a += 8;
3848 len -= 8;
3849 }
3850 tl_assert(aligned64(a));
3851 }
3852 if (len == 0) return;
3853
3854 if (len >= 4)
3855 tl_assert(aligned32(a));
3856 if (len >= 4) {
3857 zsm_apply32___msm_read( thr, a );
3858 a += 4;
3859 len -= 4;
3860 }
3861 if (len == 0) return;
3862
3863 if (len >= 2)
3864 tl_assert(aligned16(a));
3865 if (len >= 2) {
3866 zsm_apply16___msm_read( thr, a );
3867 a += 2;
3868 len -= 2;
3869 }
3870 if (len == 0) return;
3871
3872 if (len >= 1) {
3873 zsm_apply8___msm_read( thr, a );
3874 a += 1;
3875 len -= 1;
3876 }
3877 tl_assert(len == 0);
3878}
3879
3880
3881
3882void zsm_apply_range___msm_write ( Thr* thr,
3883 Addr a, SizeT len )
3884{
3885 /* fast track a couple of common cases */
3886 if (len == 4 && aligned32(a)) {
3887 zsm_apply32___msm_write( thr, a );
3888 return;
3889 }
3890 if (len == 8 && aligned64(a)) {
3891 zsm_apply64___msm_write( thr, a );
3892 return;
3893 }
3894
3895 /* be completely general (but as efficient as possible) */
3896 if (len == 0) return;
3897
3898 if (!aligned16(a) && len >= 1) {
3899 zsm_apply8___msm_write( thr, a );
3900 a += 1;
3901 len -= 1;
3902 tl_assert(aligned16(a));
3903 }
3904 if (len == 0) return;
3905
3906 if (!aligned32(a) && len >= 2) {
3907 zsm_apply16___msm_write( thr, a );
3908 a += 2;
3909 len -= 2;
3910 tl_assert(aligned32(a));
3911 }
3912 if (len == 0) return;
3913
3914 if (!aligned64(a) && len >= 4) {
3915 zsm_apply32___msm_write( thr, a );
3916 a += 4;
3917 len -= 4;
3918 tl_assert(aligned64(a));
3919 }
3920 if (len == 0) return;
3921
3922 if (len >= 8) {
3923 tl_assert(aligned64(a));
3924 while (len >= 8) {
3925 zsm_apply64___msm_write( thr, a );
3926 a += 8;
3927 len -= 8;
3928 }
3929 tl_assert(aligned64(a));
3930 }
3931 if (len == 0) return;
3932
3933 if (len >= 4)
3934 tl_assert(aligned32(a));
3935 if (len >= 4) {
3936 zsm_apply32___msm_write( thr, a );
3937 a += 4;
3938 len -= 4;
3939 }
3940 if (len == 0) return;
3941
3942 if (len >= 2)
3943 tl_assert(aligned16(a));
3944 if (len >= 2) {
3945 zsm_apply16___msm_write( thr, a );
3946 a += 2;
3947 len -= 2;
3948 }
3949 if (len == 0) return;
3950
3951 if (len >= 1) {
3952 zsm_apply8___msm_write( thr, a );
3953 a += 1;
3954 len -= 1;
3955 }
3956 tl_assert(len == 0);
3957}
3958
3959
3960
3961
3962/* Block-copy states (needed for implementing realloc()). */
3963
3964static void zsm_copy_range ( Addr src, Addr dst, SizeT len )
3965{
3966 SizeT i;
3967 if (len == 0)
3968 return;
3969
3970 /* assert for non-overlappingness */
3971 tl_assert(src+len <= dst || dst+len <= src);
3972
3973 /* To be simple, just copy byte by byte. But so as not to wreck
3974 performance for later accesses to dst[0 .. len-1], normalise
3975 destination lines as we finish with them, and also normalise the
3976 line containing the first and last address. */
3977 for (i = 0; i < len; i++) {
3978 Bool normalise
3979 = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
3980 || i == 0 /* first in range */
3981 || i == len-1; /* last in range */
3982 zsm_copy8( src+i, dst+i, normalise );
3983 }
3984}
3985
3986
3987/* For setting address ranges to a given value. Has considerable
3988 sophistication so as to avoid generating large numbers of pointless
3989 cache loads/writebacks for large ranges. */
3990
3991/* Do small ranges in-cache, in the obvious way. */
3992static
3993void zsm_set_range_SMALL ( Addr a, SizeT len, SVal svNew )
3994{
3995 /* fast track a couple of common cases */
3996 if (len == 4 && aligned32(a)) {
3997 zsm_write32( a, svNew );
3998 return;
3999 }
4000 if (len == 8 && aligned64(a)) {
4001 zsm_write64( a, svNew );
4002 return;
4003 }
4004
4005 /* be completely general (but as efficient as possible) */
4006 if (len == 0) return;
4007
4008 if (!aligned16(a) && len >= 1) {
4009 zsm_write8( a, svNew );
4010 a += 1;
4011 len -= 1;
4012 tl_assert(aligned16(a));
4013 }
4014 if (len == 0) return;
4015
4016 if (!aligned32(a) && len >= 2) {
4017 zsm_write16( a, svNew );
4018 a += 2;
4019 len -= 2;
4020 tl_assert(aligned32(a));
4021 }
4022 if (len == 0) return;
4023
4024 if (!aligned64(a) && len >= 4) {
4025 zsm_write32( a, svNew );
4026 a += 4;
4027 len -= 4;
4028 tl_assert(aligned64(a));
4029 }
4030 if (len == 0) return;
4031
4032 if (len >= 8) {
4033 tl_assert(aligned64(a));
4034 while (len >= 8) {
4035 zsm_write64( a, svNew );
4036 a += 8;
4037 len -= 8;
4038 }
4039 tl_assert(aligned64(a));
4040 }
4041 if (len == 0) return;
4042
4043 if (len >= 4)
4044 tl_assert(aligned32(a));
4045 if (len >= 4) {
4046 zsm_write32( a, svNew );
4047 a += 4;
4048 len -= 4;
4049 }
4050 if (len == 0) return;
4051
4052 if (len >= 2)
4053 tl_assert(aligned16(a));
4054 if (len >= 2) {
4055 zsm_write16( a, svNew );
4056 a += 2;
4057 len -= 2;
4058 }
4059 if (len == 0) return;
4060
4061 if (len >= 1) {
4062 zsm_write8( a, svNew );
4063 a += 1;
4064 len -= 1;
4065 }
4066 tl_assert(len == 0);
4067}
4068
4069
4070/* If we're doing a small range, hand off to zsm_set_range_SMALL. But
4071 for larger ranges, try to operate directly on the out-of-cache
4072 representation, rather than dragging lines into the cache,
4073 overwriting them, and forcing them out. This turns out to be an
4074 important performance optimisation. */
4075
4076static void zsm_set_range ( Addr a, SizeT len, SVal svNew )
4077{
4078 tl_assert(svNew != SVal_INVALID);
4079 stats__cache_make_New_arange += (ULong)len;
4080
4081 if (0 && len > 500)
4082 VG_(printf)("make New ( %#lx, %ld )\n", a, len );
4083
4084 if (0) {
4085 static UWord n_New_in_cache = 0;
4086 static UWord n_New_not_in_cache = 0;
4087 /* tag is 'a' with the in-line offset masked out,
4088 eg a[31]..a[4] 0000 */
4089 Addr tag = a & ~(N_LINE_ARANGE - 1);
4090 UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
4091 if (LIKELY(tag == cache_shmem.tags0[wix])) {
4092 n_New_in_cache++;
4093 } else {
4094 n_New_not_in_cache++;
4095 }
4096 if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
4097 VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
4098 n_New_in_cache, n_New_not_in_cache );
4099 }
4100
4101 if (LIKELY(len < 2 * N_LINE_ARANGE)) {
4102 zsm_set_range_SMALL( a, len, svNew );
4103 } else {
4104 Addr before_start = a;
4105 Addr aligned_start = cacheline_ROUNDUP(a);
4106 Addr after_start = cacheline_ROUNDDN(a + len);
4107 UWord before_len = aligned_start - before_start;
4108 UWord aligned_len = after_start - aligned_start;
4109 UWord after_len = a + len - after_start;
4110 tl_assert(before_start <= aligned_start);
4111 tl_assert(aligned_start <= after_start);
4112 tl_assert(before_len < N_LINE_ARANGE);
4113 tl_assert(after_len < N_LINE_ARANGE);
4114 tl_assert(get_cacheline_offset(aligned_start) == 0);
4115 if (get_cacheline_offset(a) == 0) {
4116 tl_assert(before_len == 0);
4117 tl_assert(a == aligned_start);
4118 }
4119 if (get_cacheline_offset(a+len) == 0) {
4120 tl_assert(after_len == 0);
4121 tl_assert(after_start == a+len);
4122 }
4123 if (before_len > 0) {
4124 zsm_set_range_SMALL( before_start, before_len, svNew );
4125 }
4126 if (after_len > 0) {
4127 zsm_set_range_SMALL( after_start, after_len, svNew );
4128 }
4129 stats__cache_make_New_inZrep += (ULong)aligned_len;
4130
4131 while (1) {
4132 Addr tag;
4133 UWord wix;
4134 if (aligned_start >= after_start)
4135 break;
4136 tl_assert(get_cacheline_offset(aligned_start) == 0);
4137 tag = aligned_start & ~(N_LINE_ARANGE - 1);
4138 wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
4139 if (tag == cache_shmem.tags0[wix]) {
4140 UWord i;
4141 for (i = 0; i < N_LINE_ARANGE / 8; i++)
4142 zsm_write64( aligned_start + i * 8, svNew );
4143 } else {
4144 UWord i;
4145 Word zix;
4146 SecMap* sm;
4147 LineZ* lineZ;
4148 /* This line is not in the cache. Do not force it in; instead
4149 modify it in-place. */
4150 /* find the Z line to write in and rcdec it or the
4151 associated F line. */
4152 find_Z_for_writing( &sm, &zix, tag );
4153 tl_assert(sm);
4154 tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
4155 lineZ = &sm->linesZ[zix];
4156 lineZ->dict[0] = svNew;
4157 lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
4158 for (i = 0; i < N_LINE_ARANGE/4; i++)
4159 lineZ->ix2s[i] = 0; /* all refer to dict[0] */
4160 rcinc_LineZ(lineZ);
4161 }
4162 aligned_start += N_LINE_ARANGE;
4163 aligned_len -= N_LINE_ARANGE;
4164 }
4165 tl_assert(aligned_start == after_start);
4166 tl_assert(aligned_len == 0);
4167 }
4168}
4169
4170
4171/////////////////////////////////////////////////////////
4172// //
4173// Synchronisation objects //
4174// //
4175/////////////////////////////////////////////////////////
4176
4177// (UInt) `echo "Synchronisation object" | md5sum`
4178#define SO_MAGIC 0x56b3c5b0U
4179
4180struct _SO {
4181 VtsID viR; /* r-clock of sender */
4182 VtsID viW; /* w-clock of sender */
4183 UInt magic;
4184};
4185
4186static SO* SO__Alloc ( void ) {
4187 SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
4188 so->viR = VtsID_INVALID;
4189 so->viW = VtsID_INVALID;
4190 so->magic = SO_MAGIC;
4191 return so;
4192}
4193static void SO__Dealloc ( SO* so ) {
4194 tl_assert(so);
4195 tl_assert(so->magic == SO_MAGIC);
4196 if (so->viR == VtsID_INVALID) {
4197 tl_assert(so->viW == VtsID_INVALID);
4198 } else {
4199 tl_assert(so->viW != VtsID_INVALID);
4200 VtsID__rcdec(so->viR);
4201 VtsID__rcdec(so->viW);
4202 }
4203 so->magic = 0;
4204 HG_(free)( so );
4205}
4206
4207
4208/////////////////////////////////////////////////////////
4209// //
4210// Top Level API //
4211// //
4212/////////////////////////////////////////////////////////
4213
4214static void show_thread_state ( HChar* str, Thr* t )
4215{
4216 if (1) return;
4217 if (t->viR == t->viW) {
4218 VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
4219 VtsID__pp( t->viR );
4220 VG_(printf)("%s","\n");
4221 } else {
4222 VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
4223 VtsID__pp( t->viR );
4224 VG_(printf)(" viW %u==", t->viW);
4225 VtsID__pp( t->viW );
4226 VG_(printf)("%s","\n");
4227 }
4228}
4229
4230
4231Thr* libhb_init (
4232 void (*get_stacktrace)( Thr*, Addr*, UWord ),
sewardjd52392d2008-11-08 20:36:26 +00004233 ExeContext* (*get_EC)( Thr* )
sewardjf98e1c02008-10-25 16:22:41 +00004234 )
4235{
4236 Thr* thr;
4237 VtsID vi;
4238 tl_assert(get_stacktrace);
sewardjf98e1c02008-10-25 16:22:41 +00004239 tl_assert(get_EC);
4240 main_get_stacktrace = get_stacktrace;
sewardjf98e1c02008-10-25 16:22:41 +00004241 main_get_EC = get_EC;
4242
4243 // No need to initialise hg_wordfm.
4244 // No need to initialise hg_wordset.
4245
4246 vts_set_init();
4247 vts_tab_init();
4248 event_map_init();
4249 VtsID__invalidate_caches();
4250
4251 // initialise shadow memory
4252 zsm_init( SVal__rcinc, SVal__rcdec );
4253
4254 thr = Thr__new();
4255 vi = VtsID__mk_Singleton( thr, 1 );
4256 thr->viR = vi;
4257 thr->viW = vi;
4258 VtsID__rcinc(thr->viR);
4259 VtsID__rcinc(thr->viW);
4260
4261 show_thread_state(" root", thr);
4262 return thr;
4263}
4264
4265Thr* libhb_create ( Thr* parent )
4266{
4267 /* The child's VTSs are copies of the parent's VTSs, but ticked at
4268 the child's index. Since the child's index is guaranteed
4269 unique, it has never been seen before, so the implicit value
4270 before the tick is zero and after that is one. */
4271 Thr* child = Thr__new();
4272
4273 child->viR = VtsID__tick( parent->viR, child );
4274 child->viW = VtsID__tick( parent->viW, child );
4275 VtsID__rcinc(child->viR);
4276 VtsID__rcinc(child->viW);
4277
4278 tl_assert(VtsID__indexAt( child->viR, child ) == 1);
4279 tl_assert(VtsID__indexAt( child->viW, child ) == 1);
4280
4281 /* and the parent has to move along too */
4282 VtsID__rcdec(parent->viR);
4283 VtsID__rcdec(parent->viW);
4284 parent->viR = VtsID__tick( parent->viR, parent );
4285 parent->viW = VtsID__tick( parent->viW, parent );
4286 VtsID__rcinc(parent->viR);
4287 VtsID__rcinc(parent->viW);
4288
4289 show_thread_state(" child", child);
4290 show_thread_state("parent", parent);
4291
4292 return child;
4293}
4294
4295/* Shut down the library, and print stats (in fact that's _all_
4296 this is for. */
4297void libhb_shutdown ( Bool show_stats )
4298{
4299 if (show_stats) {
4300 VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
4301 VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
4302 stats__secmaps_allocd,
4303 stats__secmap_ga_space_covered);
4304 VG_(printf)(" linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
4305 stats__secmap_linesZ_allocd,
4306 stats__secmap_linesZ_bytes);
4307 VG_(printf)(" linesF: %'10lu allocd (%'12lu bytes occupied)\n",
4308 stats__secmap_linesF_allocd,
4309 stats__secmap_linesF_bytes);
4310 VG_(printf)(" secmaps: %'10lu iterator steppings\n",
4311 stats__secmap_iterator_steppings);
4312 VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
4313 stats__secmaps_search, stats__secmaps_search_slow);
4314
4315 VG_(printf)("%s","\n");
4316 VG_(printf)(" cache: %'lu totrefs (%'lu misses)\n",
4317 stats__cache_totrefs, stats__cache_totmisses );
4318 VG_(printf)(" cache: %'14lu Z-fetch, %'14lu F-fetch\n",
4319 stats__cache_Z_fetches, stats__cache_F_fetches );
4320 VG_(printf)(" cache: %'14lu Z-wback, %'14lu F-wback\n",
4321 stats__cache_Z_wbacks, stats__cache_F_wbacks );
4322 VG_(printf)(" cache: %'14lu invals, %'14lu flushes\n",
4323 stats__cache_invals, stats__cache_flushes );
4324 VG_(printf)(" cache: %'14llu arange_New %'14llu direct-to-Zreps\n",
4325 stats__cache_make_New_arange,
4326 stats__cache_make_New_inZrep);
4327
4328 VG_(printf)("%s","\n");
4329 VG_(printf)(" cline: %'10lu normalises\n",
4330 stats__cline_normalises );
4331 VG_(printf)(" cline: rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4332 stats__cline_read64s,
4333 stats__cline_read32s,
4334 stats__cline_read16s,
4335 stats__cline_read8s );
4336 VG_(printf)(" cline: wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4337 stats__cline_write64s,
4338 stats__cline_write32s,
4339 stats__cline_write16s,
4340 stats__cline_write8s );
4341 VG_(printf)(" cline: sets 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
4342 stats__cline_set64s,
4343 stats__cline_set32s,
4344 stats__cline_set16s,
4345 stats__cline_set8s );
4346 VG_(printf)(" cline: get1s %'lu, copy1s %'lu\n",
4347 stats__cline_get8s, stats__cline_copy8s );
4348 VG_(printf)(" cline: splits: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4349 stats__cline_64to32splits,
4350 stats__cline_32to16splits,
4351 stats__cline_16to8splits );
4352 VG_(printf)(" cline: pulldowns: 8to4 %'12lu 4to2 %'12lu 2to1 %'12lu\n",
4353 stats__cline_64to32pulldown,
4354 stats__cline_32to16pulldown,
4355 stats__cline_16to8pulldown );
4356 if (0)
4357 VG_(printf)(" cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
4358 (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
4359
4360 VG_(printf)("%s","\n");
4361
4362 VG_(printf)(" libhb: %'13llu msm_read (%'llu changed)\n",
4363 stats__msm_read, stats__msm_read_change);
4364 VG_(printf)(" libhb: %'13llu msm_write (%'llu changed)\n",
4365 stats__msm_write, stats__msm_write_change);
4366 VG_(printf)(" libhb: %'13llu getOrd queries (%'llu misses)\n",
4367 stats__getOrdering_queries, stats__getOrdering_misses);
4368 VG_(printf)(" libhb: %'13llu join2 queries (%'llu misses)\n",
4369 stats__join2_queries, stats__join2_misses);
4370
4371 VG_(printf)("%s","\n");
4372 VG_(printf)(
4373 " libhb: %ld entries in vts_table (approximately %lu bytes)\n",
4374 VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
4375 );
4376 VG_(printf)( " libhb: %lu entries in vts_set\n",
4377 VG_(sizeFM)( vts_set ) );
4378
4379 VG_(printf)("%s","\n");
4380 VG_(printf)( " libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
4381 stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
4382 stats__ctxt_rcdec2,
4383 stats__ctxt_rcdec3 );
4384 VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
4385 stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
4386 VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
4387 (UWord)N_RCEC_TAB,
4388 stats__ctxt_tab_curr );
4389 VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
4390 stats__ctxt_tab_qs,
4391 stats__ctxt_tab_cmps );
4392#if 0
4393 VG_(printf)("sizeof(AvlNode) = %lu\n", sizeof(AvlNode));
4394 VG_(printf)("sizeof(WordBag) = %lu\n", sizeof(WordBag));
4395 VG_(printf)("sizeof(MaybeWord) = %lu\n", sizeof(MaybeWord));
4396 VG_(printf)("sizeof(CacheLine) = %lu\n", sizeof(CacheLine));
4397 VG_(printf)("sizeof(LineZ) = %lu\n", sizeof(LineZ));
4398 VG_(printf)("sizeof(LineF) = %lu\n", sizeof(LineF));
4399 VG_(printf)("sizeof(SecMap) = %lu\n", sizeof(SecMap));
4400 VG_(printf)("sizeof(Cache) = %lu\n", sizeof(Cache));
4401 VG_(printf)("sizeof(SMCacheEnt) = %lu\n", sizeof(SMCacheEnt));
4402 VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
4403 VG_(printf)("sizeof(VTS) = %lu\n", sizeof(VTS));
4404 VG_(printf)("sizeof(ScalarTS) = %lu\n", sizeof(ScalarTS));
4405 VG_(printf)("sizeof(VtsTE) = %lu\n", sizeof(VtsTE));
4406 VG_(printf)("sizeof(MSMInfo) = %lu\n", sizeof(MSMInfo));
4407
4408 VG_(printf)("sizeof(struct _XArray) = %lu\n", sizeof(struct _XArray));
4409 VG_(printf)("sizeof(struct _WordFM) = %lu\n", sizeof(struct _WordFM));
4410 VG_(printf)("sizeof(struct _Thr) = %lu\n", sizeof(struct _Thr));
4411 VG_(printf)("sizeof(struct _SO) = %lu\n", sizeof(struct _SO));
4412#endif
4413
4414 VG_(printf)("%s","<<< END libhb stats >>>\n");
4415 VG_(printf)("%s","\n");
4416
4417 }
4418}
4419
4420void libhb_async_exit ( Thr* thr )
4421{
4422 /* is there anything we need to do? */
4423}
4424
4425/* Both Segs and SOs point to VTSs. However, there is no sharing, so
4426 a Seg that points at a VTS is its one-and-only owner, and ditto for
4427 a SO that points at a VTS. */
4428
4429SO* libhb_so_alloc ( void )
4430{
4431 return SO__Alloc();
4432}
4433
4434void libhb_so_dealloc ( SO* so )
4435{
4436 tl_assert(so);
4437 tl_assert(so->magic == SO_MAGIC);
4438 SO__Dealloc(so);
4439}
4440
4441/* See comments in libhb.h for details on the meaning of
4442 strong vs weak sends and strong vs weak receives. */
4443void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
4444{
4445 /* Copy the VTSs from 'thr' into the sync object, and then move
4446 the thread along one step. */
4447
4448 tl_assert(so);
4449 tl_assert(so->magic == SO_MAGIC);
4450
4451 /* stay sane .. a thread's read-clock must always lead or be the
4452 same as its write-clock */
4453 { POrd ord = VtsID__getOrdering(thr->viW, thr->viR);
4454 tl_assert(ord == POrd_EQ || ord == POrd_LT);
4455 }
4456
4457 /* since we're overwriting the VtsIDs in the SO, we need to drop
4458 any references made by the previous contents thereof */
4459 if (so->viR == VtsID_INVALID) {
4460 tl_assert(so->viW == VtsID_INVALID);
4461 so->viR = thr->viR;
4462 so->viW = thr->viW;
4463 VtsID__rcinc(so->viR);
4464 VtsID__rcinc(so->viW);
4465 } else {
4466 /* In a strong send, we dump any previous VC in the SO and
4467 install the sending thread's VC instead. For a weak send we
4468 must join2 with what's already there. */
4469 tl_assert(so->viW != VtsID_INVALID);
4470 VtsID__rcdec(so->viR);
4471 VtsID__rcdec(so->viW);
4472 so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
4473 so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
4474 VtsID__rcinc(so->viR);
4475 VtsID__rcinc(so->viW);
4476 }
4477
4478 /* move both parent clocks along */
4479 VtsID__rcdec(thr->viR);
4480 VtsID__rcdec(thr->viW);
4481 thr->viR = VtsID__tick( thr->viR, thr );
4482 thr->viW = VtsID__tick( thr->viW, thr );
4483 VtsID__rcinc(thr->viR);
4484 VtsID__rcinc(thr->viW);
4485 if (strong_send)
4486 show_thread_state("s-send", thr);
4487 else
4488 show_thread_state("w-send", thr);
4489}
4490
4491void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
4492{
4493 tl_assert(so);
4494 tl_assert(so->magic == SO_MAGIC);
4495
4496 if (so->viR != VtsID_INVALID) {
4497 tl_assert(so->viW != VtsID_INVALID);
4498
4499 /* Weak receive (basically, an R-acquisition of a R-W lock).
4500 This advances the read-clock of the receiver, but not the
4501 write-clock. */
4502 VtsID__rcdec(thr->viR);
4503 thr->viR = VtsID__join2( thr->viR, so->viR );
4504 VtsID__rcinc(thr->viR);
4505
4506 /* For a strong receive, we also advance the receiver's write
4507 clock, which means the receive as a whole is essentially
4508 equivalent to a W-acquisition of a R-W lock. */
4509 if (strong_recv) {
4510 VtsID__rcdec(thr->viW);
4511 thr->viW = VtsID__join2( thr->viW, so->viW );
4512 VtsID__rcinc(thr->viW);
4513 }
4514
4515 if (strong_recv)
4516 show_thread_state("s-recv", thr);
4517 else
4518 show_thread_state("w-recv", thr);
4519
4520 } else {
4521 tl_assert(so->viW == VtsID_INVALID);
4522 /* Deal with degenerate case: 'so' has no vts, so there has been
4523 no message posted to it. Just ignore this case. */
4524 show_thread_state("d-recv", thr);
4525 }
4526}
4527
4528Bool libhb_so_everSent ( SO* so )
4529{
4530 if (so->viR == VtsID_INVALID) {
4531 tl_assert(so->viW == VtsID_INVALID);
4532 return False;
4533 } else {
4534 tl_assert(so->viW != VtsID_INVALID);
4535 return True;
4536 }
4537}
4538
4539#define XXX1 0 // 0x67a106c
4540#define XXX2 0
4541
4542static Bool TRACEME(Addr a, SizeT szB) {
4543 if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
4544 if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
4545 return False;
4546}
4547static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
4548 SVal sv = zsm_read8(a);
4549 VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
4550 show_thread_state("", thr);
4551 VG_(printf)("%s","\n");
4552}
4553
4554void libhb_range_new ( Thr* thr, Addr a, SizeT szB )
4555{
4556 SVal sv = SVal__mkC(thr->viW, thr->viW);
4557 tl_assert(is_sane_SVal_C(sv));
4558 if(TRACEME(a,szB))trace(thr,a,szB,"nw-before");
4559 zsm_set_range( a, szB, sv );
4560 if(TRACEME(a,szB))trace(thr,a,szB,"nw-after ");
4561}
4562
4563void libhb_range_noaccess ( Thr* thr, Addr a, SizeT szB )
4564{
4565 if(TRACEME(a,szB))trace(thr,a,szB,"NA-before");
4566 zsm_set_range( a, szB, SVal__mkA() );
4567 if(TRACEME(a,szB))trace(thr,a,szB,"NA-after ");
4568}
4569
4570void* libhb_get_Thr_opaque ( Thr* thr ) {
4571 tl_assert(thr);
4572 return thr->opaque;
4573}
4574
4575void libhb_set_Thr_opaque ( Thr* thr, void* v ) {
4576 tl_assert(thr);
4577 thr->opaque = v;
4578}
4579
4580void libhb_copy_shadow_state ( Addr dst, Addr src, SizeT len )
4581{
4582 zsm_copy_range(dst, src, len);
4583}
4584
4585void libhb_maybe_GC ( void )
4586{
4587 event_map_maybe_GC();
4588 /* If there are still freelist entries available, no need for a
4589 GC. */
4590 if (vts_tab_freelist != VtsID_INVALID)
4591 return;
4592 /* So all the table entries are full, and we're having to expand
4593 the table. But did we hit the threshhold point yet? */
4594 if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
4595 return;
4596 vts_tab__do_GC( False/*don't show stats*/ );
4597}
4598
4599
4600/////////////////////////////////////////////////////////////////
4601/////////////////////////////////////////////////////////////////
4602// //
4603// SECTION END main library //
4604// //
4605/////////////////////////////////////////////////////////////////
4606/////////////////////////////////////////////////////////////////
4607
4608/*--------------------------------------------------------------------*/
4609/*--- end libhb_main.c ---*/
4610/*--------------------------------------------------------------------*/