blob: f4ed8a17e8b8e1aa94a864a8a385342959858f25 [file] [log] [blame]
Derek Bruening88639852016-05-25 02:04:04 +00001//===-- working_set.cpp ---------------------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of EfficiencySanitizer, a family of performance tuners.
11//
12// This file contains working-set-specific code.
13//===----------------------------------------------------------------------===//
14
15#include "working_set.h"
16#include "esan.h"
17#include "esan_flags.h"
18#include "esan_shadow.h"
Derek Bruening07814762016-06-03 16:14:07 +000019#include "esan_sideline.h"
Derek Bruening8ef3f0f2016-05-31 13:41:07 +000020#include "sanitizer_common/sanitizer_procmaps.h"
Derek Bruening88639852016-05-25 02:04:04 +000021
22// We shadow every cache line of app memory with one shadow byte.
23// - The highest bit of each shadow byte indicates whether the corresponding
24// cache line has ever been accessed.
25// - The lowest bit of each shadow byte indicates whether the corresponding
26// cache line was accessed since the last sample.
27// - The other bits can be used either for a single working set snapshot
28// between two consecutive samples, or an aggregate working set snapshot
29// over multiple sample periods (future work).
30// We live with races in accessing each shadow byte.
31typedef unsigned char byte;
32
33namespace __esan {
34
Derek Bruening8ef3f0f2016-05-31 13:41:07 +000035// Our shadow memory assumes that the line size is 64.
36static const u32 CacheLineSize = 64;
37
Derek Bruening88639852016-05-25 02:04:04 +000038// See the shadow byte layout description above.
39static const u32 TotalWorkingSetBitIdx = 7;
40static const u32 CurWorkingSetBitIdx = 0;
41static const byte ShadowAccessedVal =
42 (1 << TotalWorkingSetBitIdx) | (1 << CurWorkingSetBitIdx);
43
Derek Bruening07814762016-06-03 16:14:07 +000044static SidelineThread Thread;
45// If we use real-time-based timer samples this won't overflow in any realistic
46// scenario, but if we switch to some other unit (such as memory accesses) we
47// may want to consider a 64-bit int.
48static u32 SnapshotNum;
49
Derek Bruening88639852016-05-25 02:04:04 +000050void processRangeAccessWorkingSet(uptr PC, uptr Addr, SIZE_T Size,
51 bool IsWrite) {
52 if (Size == 0)
53 return;
54 SIZE_T I = 0;
55 uptr LineSize = getFlags()->cache_line_size;
56 // As Addr+Size could overflow at the top of a 32-bit address space,
57 // we avoid the simpler formula that rounds the start and end.
58 SIZE_T NumLines = Size / LineSize +
59 // Add any extra at the start or end adding on an extra line:
60 (LineSize - 1 + Addr % LineSize + Size % LineSize) / LineSize;
61 byte *Shadow = (byte *)appToShadow(Addr);
62 // Write shadow bytes until we're word-aligned.
63 while (I < NumLines && (uptr)Shadow % 4 != 0) {
64 if ((*Shadow & ShadowAccessedVal) != ShadowAccessedVal)
65 *Shadow |= ShadowAccessedVal;
66 ++Shadow;
67 ++I;
68 }
69 // Write whole shadow words at a time.
70 // Using a word-stride loop improves the runtime of a microbenchmark of
71 // memset calls by 10%.
72 u32 WordValue = ShadowAccessedVal | ShadowAccessedVal << 8 |
73 ShadowAccessedVal << 16 | ShadowAccessedVal << 24;
74 while (I + 4 <= NumLines) {
75 if ((*(u32*)Shadow & WordValue) != WordValue)
76 *(u32*)Shadow |= WordValue;
77 Shadow += 4;
78 I += 4;
79 }
80 // Write any trailing shadow bytes.
81 while (I < NumLines) {
82 if ((*Shadow & ShadowAccessedVal) != ShadowAccessedVal)
83 *Shadow |= ShadowAccessedVal;
84 ++Shadow;
85 ++I;
86 }
87}
88
Derek Bruening8ef3f0f2016-05-31 13:41:07 +000089// This routine will word-align ShadowStart and ShadowEnd prior to scanning.
90static u32 countAndClearShadowValues(u32 BitIdx, uptr ShadowStart,
91 uptr ShadowEnd) {
92 u32 WorkingSetSize = 0;
93 u32 ByteValue = 0x1 << BitIdx;
94 u32 WordValue = ByteValue | ByteValue << 8 | ByteValue << 16 |
95 ByteValue << 24;
96 // Get word aligned start.
97 ShadowStart = RoundDownTo(ShadowStart, sizeof(u32));
98 for (u32 *Ptr = (u32 *)ShadowStart; Ptr < (u32 *)ShadowEnd; ++Ptr) {
99 if ((*Ptr & WordValue) != 0) {
100 byte *BytePtr = (byte *)Ptr;
101 for (u32 j = 0; j < sizeof(u32); ++j) {
102 if (BytePtr[j] & ByteValue) {
103 ++WorkingSetSize;
104 // TODO: Accumulate to the lower-frequency bit to the left.
105 }
106 }
107 // Clear this bit from every shadow byte.
108 *Ptr &= ~WordValue;
109 }
110 }
111 return WorkingSetSize;
112}
113
114// Scan shadow memory to calculate the number of cache lines being accessed,
115// i.e., the number of non-zero bits indexed by BitIdx in each shadow byte.
116// We also clear the lowest bits (most recent working set snapshot).
117static u32 computeWorkingSizeAndReset(u32 BitIdx) {
118 u32 WorkingSetSize = 0;
119 MemoryMappingLayout MemIter(true/*cache*/);
120 uptr Start, End, Prot;
121 while (MemIter.Next(&Start, &End, nullptr/*offs*/, nullptr/*file*/,
122 0/*file size*/, &Prot)) {
123 VPrintf(4, "%s: considering %p-%p app=%d shadow=%d prot=%u\n",
124 __FUNCTION__, Start, End, Prot, isAppMem(Start),
125 isShadowMem(Start));
126 if (isShadowMem(Start) && (Prot & MemoryMappingLayout::kProtectionWrite)) {
127 VPrintf(3, "%s: walking %p-%p\n", __FUNCTION__, Start, End);
128 WorkingSetSize += countAndClearShadowValues(BitIdx, Start, End);
129 }
130 }
131 return WorkingSetSize;
132}
133
Derek Bruening07814762016-06-03 16:14:07 +0000134// This is invoked from a signal handler but in a sideline thread doing nothing
135// else so it is a little less fragile than a typical signal handler.
136static void takeSample(void *Arg) {
137 // FIXME: record the size and report at process end. For now this simply
138 // serves as a test of the sideline thread functionality.
139 VReport(1, "%s: snapshot #%d: %u\n", SanitizerToolName, SnapshotNum,
140 computeWorkingSizeAndReset(CurWorkingSetBitIdx));
141 ++SnapshotNum;
142}
143
Derek Bruening88639852016-05-25 02:04:04 +0000144void initializeWorkingSet() {
Derek Bruening8ef3f0f2016-05-31 13:41:07 +0000145 CHECK(getFlags()->cache_line_size == CacheLineSize);
Derek Bruening8e74c102016-05-31 13:21:03 +0000146 registerMemoryFaultHandler();
Derek Bruening07814762016-06-03 16:14:07 +0000147
148 if (getFlags()->record_snapshots)
149 Thread.launchThread(takeSample, nullptr, getFlags()->sample_freq);
Derek Bruening88639852016-05-25 02:04:04 +0000150}
151
Derek Bruening8ef3f0f2016-05-31 13:41:07 +0000152static u32 getSizeForPrinting(u32 NumOfCachelines, const char *&Unit) {
153 // We need a constant to avoid software divide support:
154 static const u32 KilobyteCachelines = (0x1 << 10) / CacheLineSize;
155 static const u32 MegabyteCachelines = KilobyteCachelines << 10;
156
157 if (NumOfCachelines > 10 * MegabyteCachelines) {
158 Unit = "MB";
159 return NumOfCachelines / MegabyteCachelines;
160 } else if (NumOfCachelines > 10 * KilobyteCachelines) {
161 Unit = "KB";
162 return NumOfCachelines / KilobyteCachelines;
163 } else {
164 Unit = "Bytes";
165 return NumOfCachelines * CacheLineSize;
166 }
167}
168
Derek Bruening88639852016-05-25 02:04:04 +0000169int finalizeWorkingSet() {
Derek Bruening07814762016-06-03 16:14:07 +0000170 if (getFlags()->record_snapshots)
171 Thread.joinThread();
172
Derek Bruening8ef3f0f2016-05-31 13:41:07 +0000173 // Get the working set size for the entire execution.
174 u32 NumOfCachelines = computeWorkingSizeAndReset(TotalWorkingSetBitIdx);
175 const char *Unit;
176 u32 Size = getSizeForPrinting(NumOfCachelines, Unit);
177 Report(" %s: the total working set size: %u %s (%u cache lines)\n",
178 SanitizerToolName, Size, Unit, NumOfCachelines);
Derek Bruening88639852016-05-25 02:04:04 +0000179 return 0;
180}
181
182} // namespace __esan